mirror of
https://github.com/deepseek-ai/3FS
synced 2025-06-26 18:16:45 +00:00
Initial commit
This commit is contained in:
2
src/meta/CMakeLists.txt
Normal file
2
src/meta/CMakeLists.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
target_add_lib(meta core-app core-user core-service fdb meta-fbs mgmtd-client storage-client memory-common analytics)
|
||||
target_add_bin(meta_main "meta.cpp" meta jemalloc)
|
||||
133
src/meta/base/Config.h
Normal file
133
src/meta/base/Config.h
Normal file
@@ -0,0 +1,133 @@
|
||||
#pragma once
|
||||
|
||||
#include "analytics/StructuredTraceLog.h"
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/kv/TransactionRetry.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/CoroutinesPool.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/PriorityCoroutinePool.h"
|
||||
#include "common/utils/Size.h"
|
||||
#include "core/user/UserCache.h"
|
||||
#include "meta/components/Distributor.h"
|
||||
#include "meta/components/Forward.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/event/Event.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
using kv::TransactionRetry;
|
||||
|
||||
struct GcConfig : ConfigBase<GcConfig> {
|
||||
CONFIG_HOT_UPDATED_ITEM(enable, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(scan_interval, 200_ms);
|
||||
CONFIG_HOT_UPDATED_ITEM(scan_batch, 4096);
|
||||
CONFIG_HOT_UPDATED_ITEM(remove_chunks_batch_size, 32);
|
||||
CONFIG_HOT_UPDATED_ITEM(gc_file_delay, 5_min);
|
||||
CONFIG_HOT_UPDATED_ITEM(gc_file_concurrent, 32ul);
|
||||
CONFIG_HOT_UPDATED_ITEM(gc_directory_delay, 0_s);
|
||||
CONFIG_HOT_UPDATED_ITEM(gc_directory_concurrent, 4ul);
|
||||
CONFIG_HOT_UPDATED_ITEM(gc_directory_entry_batch, 32ul);
|
||||
CONFIG_HOT_UPDATED_ITEM(gc_directory_entry_concurrent, 4ul);
|
||||
CONFIG_HOT_UPDATED_ITEM(retry_delay, 10_min);
|
||||
// disable gc delay if free space is below 5%
|
||||
CONFIG_HOT_UPDATED_ITEM(gc_delay_free_space_threshold, 5);
|
||||
CONFIG_HOT_UPDATED_ITEM(check_session, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(distributed_gc, true); // random select a GC directory
|
||||
CONFIG_HOT_UPDATED_ITEM(txn_low_priority, false);
|
||||
|
||||
// small file or large file
|
||||
CONFIG_HOT_UPDATED_ITEM(small_file_chunks, (uint64_t)32);
|
||||
CONFIG_HOT_UPDATED_ITEM(large_file_chunks, (uint64_t)128);
|
||||
|
||||
CONFIG_HOT_UPDATED_ITEM(recursive_perm_check, true);
|
||||
|
||||
CONFIG_OBJ(workers, PriorityCoroutinePoolConfig, [](auto &c) {
|
||||
c.set_coroutines_num(8);
|
||||
c.set_queue_size(1024);
|
||||
});
|
||||
CONFIG_OBJ(retry_remove_chunks, storage::client::RetryOptions, [](auto &c) {
|
||||
c.set_init_wait_time(10_s);
|
||||
c.set_max_wait_time(10_s);
|
||||
c.set_max_retry_time(30_s);
|
||||
});
|
||||
};
|
||||
|
||||
struct Config : ConfigBase<Config> {
|
||||
CONFIG_HOT_UPDATED_ITEM(readonly, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(authenticate, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(grv_cache, false);
|
||||
|
||||
CONFIG_OBJ(gc, GcConfig);
|
||||
CONFIG_OBJ(session_manager, SessionManager::Config);
|
||||
CONFIG_OBJ(distributor, Distributor::Config);
|
||||
CONFIG_OBJ(forward, Forward::Config);
|
||||
CONFIG_OBJ(event_trace_log, analytics::StructuredTraceLog<MetaEventTrace>::Config);
|
||||
|
||||
CONFIG_HOT_UPDATED_ITEM(max_symlink_depth, 4L, ConfigCheckers::checkPositive);
|
||||
CONFIG_HOT_UPDATED_ITEM(max_symlink_count, 10L, ConfigCheckers::checkPositive);
|
||||
CONFIG_HOT_UPDATED_ITEM(max_directory_depth, 64L, ConfigCheckers::checkPositive);
|
||||
CONFIG_HOT_UPDATED_ITEM(acl_cache_time, 15_s);
|
||||
CONFIG_HOT_UPDATED_ITEM(list_default_limit, 128);
|
||||
CONFIG_HOT_UPDATED_ITEM(sync_on_prune_session, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(max_remove_chunks_per_request, 32u, ConfigCheckers::checkPositive);
|
||||
CONFIG_HOT_UPDATED_ITEM(allow_stat_deleted_inodes, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(ignore_length_hint, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(time_granularity, 1_s);
|
||||
CONFIG_HOT_UPDATED_ITEM(dynamic_stripe, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(dynamic_stripe_initial, 16u, ConfigCheckers::checkPositive);
|
||||
CONFIG_HOT_UPDATED_ITEM(dynamic_stripe_growth, 2u);
|
||||
CONFIG_HOT_UPDATED_ITEM(batch_stat_concurrent, 8u);
|
||||
CONFIG_HOT_UPDATED_ITEM(batch_stat_by_path_concurrent, 4u);
|
||||
CONFIG_HOT_UPDATED_ITEM(max_batch_operations, 4096u);
|
||||
CONFIG_HOT_UPDATED_ITEM(enable_new_chunk_engine, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(allow_owner_change_immutable, false);
|
||||
|
||||
// deperated
|
||||
CONFIG_HOT_UPDATED_ITEM(check_file_hole, false);
|
||||
CONFIG_OBJ(background_hole_checker, CoroutinesPoolBase::Config, [](CoroutinesPoolBase::Config &c) {
|
||||
c.set_coroutines_num(16);
|
||||
c.set_queue_size(4096);
|
||||
});
|
||||
|
||||
CONFIG_HOT_UPDATED_ITEM(inodeId_check_unique, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(inodeId_abort_on_duplicate, false);
|
||||
|
||||
// replace file with new inode on O_TRUNC
|
||||
CONFIG_HOT_UPDATED_ITEM(otrunc_replace_file, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(otrunc_replace_file_threshold, 1_GB);
|
||||
|
||||
// statfs
|
||||
CONFIG_HOT_UPDATED_ITEM(statfs_cache_time, 60_s);
|
||||
CONFIG_HOT_UPDATED_ITEM(statfs_update_interval, 5_s);
|
||||
CONFIG_HOT_UPDATED_ITEM(statfs_space_imbalance_threshold, 5);
|
||||
|
||||
// iflags
|
||||
CONFIG_HOT_UPDATED_ITEM(iflags_chain_allocation, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(iflags_chunk_engine, true);
|
||||
|
||||
// recursive remove
|
||||
CONFIG_HOT_UPDATED_ITEM(recursive_remove_check_owner, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(recursive_remove_perm_check, (size_t)1024);
|
||||
CONFIG_HOT_UPDATED_ITEM(allow_directly_move_to_trash, false);
|
||||
|
||||
// idempotent operation
|
||||
CONFIG_HOT_UPDATED_ITEM(idempotent_record_expire, 30_min);
|
||||
CONFIG_HOT_UPDATED_ITEM(idempotent_record_clean, 1_min);
|
||||
CONFIG_HOT_UPDATED_ITEM(idempotent_remove, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(idempotent_rename, false);
|
||||
|
||||
CONFIG_HOT_UPDATED_ITEM(operation_timeout, 5_s);
|
||||
|
||||
CONFIG_OBJ(retry_transaction, TransactionRetry);
|
||||
CONFIG_OBJ(retry_remove_chunks, storage::client::RetryOptions, [](auto &c) {
|
||||
c.set_init_wait_time(10_s);
|
||||
c.set_max_wait_time(10_s);
|
||||
c.set_max_retry_time(30_s);
|
||||
});
|
||||
|
||||
CONFIG_OBJ(user_cache, core::UserCache::Config);
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
79
src/meta/components/AclCache.h
Normal file
79
src/meta/components/AclCache.h
Normal file
@@ -0,0 +1,79 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <folly/Random.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/container/EvictingCacheMap.h>
|
||||
#include <optional>
|
||||
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class AclCache {
|
||||
public:
|
||||
AclCache(size_t cacheSize) {
|
||||
for (size_t i = 0; i < kNumShards; i++) {
|
||||
shardedMaps_.emplace_back(
|
||||
folly::EvictingCacheMap<InodeId, CacheEntry>(std::max(cacheSize / kNumShards, 1ul << 10), 128));
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<Acl> get(InodeId inode, Duration ttl) {
|
||||
static monitor::CountRecorder hit("meta_server.aclcache_hit");
|
||||
static monitor::CountRecorder miss("meta_server.aclcache_miss");
|
||||
|
||||
if (ttl.count() == 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::optional<CacheEntry> cached;
|
||||
{
|
||||
auto &shard = getShard(inode);
|
||||
auto guard = shard.lock();
|
||||
auto iter = guard->find(inode);
|
||||
if (iter != guard->end()) {
|
||||
cached = iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cached.has_value()) {
|
||||
miss.addSample(1);
|
||||
return std::nullopt;
|
||||
}
|
||||
auto deadline = cached->timestamp + ttl * folly::Random::randDouble(0.8, 1.0);
|
||||
if (deadline < SteadyClock::now()) {
|
||||
miss.addSample(1);
|
||||
return std::nullopt;
|
||||
}
|
||||
hit.addSample(1);
|
||||
return cached->acl;
|
||||
}
|
||||
|
||||
void set(InodeId inode, Acl acl) {
|
||||
auto &shard = getShard(inode);
|
||||
shard.lock()->set(inode, {SteadyClock::now(), acl});
|
||||
}
|
||||
|
||||
void invalid(InodeId inode) { getShard(inode).lock()->erase(inode); }
|
||||
|
||||
private:
|
||||
static constexpr auto kNumShards = 32u;
|
||||
|
||||
struct CacheEntry {
|
||||
SteadyTime timestamp;
|
||||
Acl acl;
|
||||
};
|
||||
using CacheMap = folly::Synchronized<folly::EvictingCacheMap<InodeId, CacheEntry>, std::mutex>;
|
||||
|
||||
CacheMap &getShard(InodeId inode) {
|
||||
auto shardId = inode.u64() % kNumShards;
|
||||
return shardedMaps_[shardId];
|
||||
}
|
||||
|
||||
std::vector<CacheMap> shardedMaps_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
131
src/meta/components/ChainAllocator.h
Normal file
131
src/meta/components/ChainAllocator.h
Normal file
@@ -0,0 +1,131 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Random.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/mgmtd/MgmtdTypes.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class ChainAllocator {
|
||||
public:
|
||||
ChainAllocator(std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient)
|
||||
: mgmtdClient_(std::move(mgmtdClient)) {}
|
||||
|
||||
CoTryTask<void> checkLayoutValid(const Layout &layout) {
|
||||
CO_RETURN_ON_ERROR(layout.valid(true));
|
||||
|
||||
if (!layout.empty()) {
|
||||
auto routing = getRoutingInfo();
|
||||
|
||||
const auto &chains = layout.getChainIndexList();
|
||||
for (auto index : chains) {
|
||||
auto ref = flat::ChainRef{layout.tableId, layout.tableVersion, index};
|
||||
if (auto chain = routing->getChain(ref); !chain) {
|
||||
XLOGF(ERR, "Layout contains a not found ChainRef {}", ref);
|
||||
co_return makeError(MetaCode::kInvalidFileLayout, fmt::format("{} not found", ref));
|
||||
} else if (chain->targets.empty()) {
|
||||
XLOGF(ERR, "Chain {} has no target", chain->chainId);
|
||||
co_return makeError(MetaCode::kInvalidFileLayout, fmt::format("Chain {} has no target", chain->chainId));
|
||||
}
|
||||
}
|
||||
}
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> allocateChainsForLayout(Layout &layout) {
|
||||
co_return co_await allocateChainsForLayout(layout, [&](size_t chainCnt) {
|
||||
auto tableId = layout.tableId;
|
||||
auto stripeSize = layout.stripeSize;
|
||||
auto key = AllocType(tableId, stripeSize);
|
||||
auto guard = roundRobin_.lock();
|
||||
auto iter = guard->find(key);
|
||||
if (iter == guard->end()) {
|
||||
// start with random value
|
||||
auto initial = folly::Random::rand32(chainCnt) / stripeSize * stripeSize;
|
||||
iter = guard->insert({key, initial}).first;
|
||||
}
|
||||
auto res = (iter->second % chainCnt) + 1;
|
||||
iter->second = (iter->second + stripeSize) % chainCnt;
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
CoTryTask<void> allocateChainsForLayout(Layout &layout, folly::Synchronized<uint32_t> &chainAllocCounter) {
|
||||
co_return co_await allocateChainsForLayout(layout, [&](size_t chainCnt) {
|
||||
auto guard = chainAllocCounter.wlock();
|
||||
auto stripeSize = layout.stripeSize;
|
||||
if (*guard == (uint32_t)-1) {
|
||||
// start with random value
|
||||
*guard = folly::Random::rand32(chainCnt) / stripeSize * stripeSize;
|
||||
}
|
||||
// add and return.
|
||||
auto res = (*guard % chainCnt) + 1;
|
||||
*guard = (*guard + stripeSize) % chainCnt;
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
CoTryTask<void> allocateChainsForLayout(Layout &layout, auto &&roundRobin) {
|
||||
CO_RETURN_ON_ERROR(co_await checkLayoutValid(layout));
|
||||
if (!layout.empty()) {
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
auto tableId = layout.tableId;
|
||||
auto tableVersion = layout.tableVersion;
|
||||
|
||||
auto routing = getRoutingInfo();
|
||||
const auto *table = routing->raw()->getChainTable(tableId, tableVersion);
|
||||
if (!table) {
|
||||
XLOGF(ERR, "Failed to find ChainTable with {} and {}", tableId, tableVersion);
|
||||
co_return makeError(MetaCode::kInvalidFileLayout,
|
||||
fmt::format("ChainTable with {} and {} not found", tableId, tableVersion));
|
||||
} else if (!table->chainTableVersion) {
|
||||
XLOGF(ERR, "Invalid table {} version {}", tableId, tableVersion);
|
||||
co_return makeError(MetaCode::kInvalidFileLayout,
|
||||
fmt::format("Invalid chain table {} version {}", tableId, tableVersion));
|
||||
}
|
||||
auto chainCnt = table->chains.size();
|
||||
if (chainCnt < layout.stripeSize || chainCnt == 0) {
|
||||
XLOGF(ERR,
|
||||
"Failed to allocate for layout {}, chain table {} have only {} chains.",
|
||||
layout,
|
||||
tableId.toUnderType(),
|
||||
chainCnt);
|
||||
co_return makeError(
|
||||
MetaCode::kInvalidFileLayout,
|
||||
fmt::format("try to allocate {} chains from {}, found {}", layout.stripeSize, tableId, chainCnt));
|
||||
}
|
||||
auto chainBegin = roundRobin(chainCnt);
|
||||
layout.tableVersion = table->chainTableVersion;
|
||||
layout.chains = Layout::ChainRange(chainBegin, Layout::ChainRange::STD_SHUFFLE_MT19937, folly::Random::rand64());
|
||||
if (auto valid = layout.valid(false); valid.hasError()) {
|
||||
XLOGF(DFATAL, "Layout is not valid after alloc {}, error {}", layout, valid.error());
|
||||
CO_RETURN_ERROR(valid);
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<client::RoutingInfo> getRoutingInfo() { return mgmtdClient_->getRoutingInfo(); }
|
||||
|
||||
using AllocType = std::pair<flat::ChainTableId, size_t>;
|
||||
folly::Synchronized<std::map<AllocType, uint32_t>, std::mutex> roundRobin_;
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
321
src/meta/components/Distributor.cc
Normal file
321
src/meta/components/Distributor.cc
Normal file
@@ -0,0 +1,321 @@
|
||||
#include "Distributor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <folly/Random.h>
|
||||
#include <folly/experimental/coro/BlockingWait.h>
|
||||
#include <folly/functional/Partial.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/kv/WithTransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/MurmurHash3.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Utils.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
#include "fdb/FDBTransaction.h"
|
||||
#include "fmt/core.h"
|
||||
#include "fmt/format.h"
|
||||
|
||||
#define FMT_KEY(key) fmt::join((const uint8_t *)(key).data(), (const uint8_t *)(key).data() + (key).size(), ",")
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
namespace {
|
||||
monitor::CountRecorder setMapCounter("meta_server.dist_set_map");
|
||||
} // namespace
|
||||
|
||||
std::string Distributor::PerServerKey::pack(flat::NodeId nodeId) {
|
||||
return fmt::format("{}-{:08d}", kPrefix, nodeId.toUnderType());
|
||||
}
|
||||
|
||||
flat::NodeId Distributor::PerServerKey::unpack(std::string_view key) {
|
||||
uint32_t nodeId;
|
||||
auto fmt = fmt::format("{}-{{}}", kPrefix);
|
||||
auto ret = scn::scan(key, fmt, nodeId);
|
||||
if (!ret) {
|
||||
return flat::NodeId(0);
|
||||
} else {
|
||||
return flat::NodeId(nodeId);
|
||||
}
|
||||
}
|
||||
|
||||
void Distributor::start(CPUExecutorGroup &exec) {
|
||||
auto result = folly::coro::blockingWait(update(false));
|
||||
XLOGF_IF(ERR, result.hasError(), "failed to update server map on start, error {}", result.error());
|
||||
|
||||
bgRunner_ = std::make_unique<BackgroundRunner>(&exec.randomPick());
|
||||
bgRunner_->start(
|
||||
fmt::format("distributor_update@{}", nodeId_),
|
||||
[this]() -> CoTask<void> {
|
||||
auto result = co_await update(false);
|
||||
XLOGF_IF(CRITICAL, result.hasError(), "Distributor update failed, {}", result.error());
|
||||
},
|
||||
[&]() { return config_.update_interval() * folly::Random::randDouble(0.8, 1.2); });
|
||||
}
|
||||
|
||||
void Distributor::stopAndJoin(bool updateMap) {
|
||||
XLOGF(INFO, "{} stop, update map {}", nodeId_, updateMap);
|
||||
if (bgRunner_) {
|
||||
folly::coro::blockingWait(bgRunner_->stopAll());
|
||||
bgRunner_.reset();
|
||||
}
|
||||
if (updateMap) {
|
||||
XLOGF(INFO, "{} update map on stop", nodeId_);
|
||||
auto result = folly::coro::blockingWait(update(true));
|
||||
XLOGF_IF(ERR, result.hasError(), "failed to update server map on stop, error {}", result.error());
|
||||
}
|
||||
XLOGF(INFO, "{} stopped", nodeId_);
|
||||
}
|
||||
|
||||
flat::NodeId Distributor::getServer(InodeId inodeId) {
|
||||
auto guard = latest_.rlock();
|
||||
return Weight::select(guard->active, inodeId);
|
||||
}
|
||||
|
||||
CoTryTask<std::pair<bool, kv::Versionstamp>> Distributor::checkOnServer(kv::IReadWriteTransaction &txn,
|
||||
InodeId inodeId) {
|
||||
co_return co_await checkOnServer(txn, inodeId, nodeId_);
|
||||
}
|
||||
|
||||
CoTryTask<std::pair<bool, kv::Versionstamp>> Distributor::checkOnServer(kv::IReadWriteTransaction &txn,
|
||||
InodeId inodeId,
|
||||
flat::NodeId nodeId) {
|
||||
auto versionstamp = co_await loadVersion(txn);
|
||||
CO_RETURN_ON_ERROR(versionstamp);
|
||||
|
||||
auto rlock = latest_.rlock();
|
||||
if (*versionstamp > rlock->versionstamp) {
|
||||
rlock.unlock();
|
||||
CO_RETURN_ON_ERROR(co_await loadServerMap(txn, false));
|
||||
rlock = latest_.rlock();
|
||||
}
|
||||
|
||||
XLOGF_IF(FATAL, *versionstamp > rlock->versionstamp, "{} > {}", FMT_KEY(*versionstamp), FMT_KEY(rlock->versionstamp));
|
||||
if (*versionstamp < rlock->versionstamp) {
|
||||
XLOGF(WARN, "version {} < {}, need retry", FMT_KEY(*versionstamp), FMT_KEY(rlock->versionstamp));
|
||||
co_return makeError(TransactionCode::kTooOld, "distributor versionstamp changed");
|
||||
}
|
||||
|
||||
auto server = Weight::select(rlock->active, inodeId);
|
||||
co_return std::pair(server == nodeId, *versionstamp);
|
||||
}
|
||||
|
||||
CoTryTask<kv::Versionstamp> Distributor::loadVersion(kv::IReadOnlyTransaction &txn) {
|
||||
co_return (co_await txn.get(kv::kMetadataVersionKey)).then([](auto &value) {
|
||||
if (!value.has_value()) {
|
||||
return kv::Versionstamp{0};
|
||||
}
|
||||
|
||||
auto version = kv::Versionstamp{0};
|
||||
XLOGF_IF(FATAL,
|
||||
value->size() != version.size(),
|
||||
"kMetadataVersionKey -> value {}, size not match",
|
||||
FMT_KEY(*value));
|
||||
memcpy(version.data(), value->data(), version.size());
|
||||
return version;
|
||||
});
|
||||
}
|
||||
|
||||
CoTryTask<void> Distributor::updateVersion(kv::IReadWriteTransaction &txn) {
|
||||
std::array<char, sizeof(kv::Versionstamp)> buf{0};
|
||||
co_return co_await txn.setVersionstampedValue(kv::kMetadataVersionKey, {buf.data(), buf.size()}, 0);
|
||||
}
|
||||
|
||||
CoTryTask<Distributor::LatestServerMap> Distributor::loadServerMap(kv::IReadOnlyTransaction &txn, bool update) {
|
||||
auto versionstamp = co_await loadVersion(txn);
|
||||
CO_RETURN_ON_ERROR(versionstamp);
|
||||
|
||||
auto load = co_await txn.get(kMapKey);
|
||||
CO_RETURN_ON_ERROR(load);
|
||||
XLOGF_IF(DFATAL,
|
||||
(!load->has_value() && updated_ != 0),
|
||||
"{} distributor server map not found, shouldn't happen",
|
||||
nodeId_);
|
||||
|
||||
ServerMap map;
|
||||
if (load->has_value()) {
|
||||
auto des = serde::deserialize(map, **load);
|
||||
if (des.hasError()) {
|
||||
XLOGF(DFATAL, "Failed to deserializa server map, {}", des.error());
|
||||
co_return makeError(MetaCode::kInconsistent, "Invalid distributor server map");
|
||||
}
|
||||
} else {
|
||||
XLOGF(INFO, "server map not found");
|
||||
}
|
||||
|
||||
if (*versionstamp <= latest_.rlock()->versionstamp) {
|
||||
co_return LatestServerMap{map, *versionstamp};
|
||||
}
|
||||
|
||||
{
|
||||
auto wlock = latest_.wlock();
|
||||
if (*versionstamp > wlock->versionstamp) {
|
||||
XLOGF(INFO,
|
||||
"{} get new server map: {}, versionstamp: {}, update {}",
|
||||
nodeId_,
|
||||
map,
|
||||
FMT_KEY(*versionstamp),
|
||||
update);
|
||||
*wlock = {map, *versionstamp};
|
||||
}
|
||||
}
|
||||
|
||||
co_return LatestServerMap{map, *versionstamp};
|
||||
}
|
||||
|
||||
CoTryTask<void> Distributor::updateServerMap(kv::IReadWriteTransaction &txn, const Distributor::ServerMap &map) {
|
||||
XLOGF(INFO, "{} try set new server map: {}", nodeId_, map);
|
||||
setMapCounter.addSample(1);
|
||||
auto key = kMapKey;
|
||||
auto value = serde::serialize(map);
|
||||
CO_RETURN_ON_ERROR(co_await txn.set(key, value));
|
||||
CO_RETURN_ON_ERROR(co_await updateVersion(txn));
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> Distributor::update(bool exit) {
|
||||
for (size_t i = 0; i < 10; i++) {
|
||||
auto strategy = kv::FDBRetryStrategy();
|
||||
auto result = co_await kv::WithTransaction<kv::FDBRetryStrategy>(strategy).run(
|
||||
kvEngine_->createReadWriteTransaction(),
|
||||
[&](kv::IReadWriteTransaction &txn) -> CoTryTask<bool> { co_return co_await update(txn, exit); });
|
||||
XLOGF_IF(ERR, result.hasError(), "{} update failed, error {}", nodeId_, result.error());
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
|
||||
// if update generate a new map, we need update again to load it.
|
||||
auto newMap = *result;
|
||||
XLOGF(INFO, "{} updated map, new {}, exit {}", nodeId_, newMap, exit);
|
||||
if (!newMap || exit) {
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
updated_.fetch_add(1);
|
||||
}
|
||||
|
||||
XLOGF(CRITICAL, "{} update not finished after too many times", nodeId_);
|
||||
co_return makeError(MetaCode::kBusy, "update not finished after too many times");
|
||||
}
|
||||
|
||||
CoTryTask<bool> Distributor::update(kv::IReadWriteTransaction &txn, bool exit) {
|
||||
XLOGF(INFO, "{} update, exit {}", nodeId_, exit);
|
||||
auto current = co_await loadServerMap(txn, true);
|
||||
CO_RETURN_ON_ERROR(current);
|
||||
auto startCheck = SteadyClock::now();
|
||||
|
||||
{
|
||||
auto rlock = latest_.rlock();
|
||||
XLOGF_IF(FATAL,
|
||||
current->versionstamp > rlock->versionstamp,
|
||||
"{} > {}",
|
||||
FMT_KEY(current->versionstamp),
|
||||
FMT_KEY(rlock->versionstamp));
|
||||
if (current->versionstamp < rlock->versionstamp) {
|
||||
XLOGF(WARN, "version {} < {}, need retry", FMT_KEY(current->versionstamp), FMT_KEY(rlock->versionstamp));
|
||||
co_return makeError(TransactionCode::kTooOld, "distributor versionstamp changed");
|
||||
} else {
|
||||
XLOGF_IF(DFATAL,
|
||||
current->active != rlock->active,
|
||||
"versionstamp {}, {} != {}",
|
||||
FMT_KEY(current->versionstamp),
|
||||
fmt::join(current->active.begin(), current->active.end(), ","),
|
||||
fmt::join(rlock->active.begin(), rlock->active.end(), ","));
|
||||
}
|
||||
}
|
||||
|
||||
auto opts = kv::TransactionHelper::ListByPrefixOptions().withSnapshot(true).withInclusive(false).withLimit(0);
|
||||
auto result = co_await kv::TransactionHelper::listByPrefix(txn, fmt::format("{}-", kPrefix), opts);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
|
||||
std::set<flat::NodeId> dead;
|
||||
servers_.withWLock([&](auto &servers) {
|
||||
bool self = false;
|
||||
for (auto &[key, versionstamp] : *result) {
|
||||
auto nodeId = PerServerKey::unpack(key);
|
||||
if (!nodeId) {
|
||||
XLOGF(DFATAL, "Failed to unpack key {}", key);
|
||||
continue;
|
||||
} else if (nodeId == nodeId_) {
|
||||
self = true;
|
||||
continue;
|
||||
}
|
||||
if (!servers.contains(nodeId) || servers[nodeId].versionstamp != versionstamp) {
|
||||
XLOGF(INFO,
|
||||
"{} found {} alive, prev {}, curr {}",
|
||||
nodeId_,
|
||||
nodeId,
|
||||
FMT_KEY(servers[nodeId].versionstamp),
|
||||
FMT_KEY(versionstamp));
|
||||
servers[nodeId] = {versionstamp, SteadyClock::now()};
|
||||
}
|
||||
}
|
||||
XLOGF_IF(DFATAL, (updated_ != 0 && !self), "self {} not found!!!", nodeId_);
|
||||
|
||||
auto timeout = config_.timeout();
|
||||
for (auto nodeId : current->active) {
|
||||
auto state = servers[nodeId];
|
||||
if (nodeId != nodeId_ && state.lastUpdate + timeout < startCheck) {
|
||||
XLOGF(CRITICAL, "{} mark {} as dead, not update in {}", nodeId_, nodeId, timeout);
|
||||
dead.emplace(nodeId);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
auto key = PerServerKey::pack(nodeId_);
|
||||
std::array<char, sizeof(kv::Versionstamp)> buf{0};
|
||||
CO_RETURN_ON_ERROR(co_await txn.setVersionstampedValue(key, {buf.data(), buf.size()}, 0));
|
||||
|
||||
bool update = false;
|
||||
if (!exit && std::find(current->active.begin(), current->active.end(), nodeId_) == current->active.end()) {
|
||||
XLOGF(INFO, "{} not in server map, create a new map", nodeId_);
|
||||
update = true;
|
||||
}
|
||||
if (!dead.empty()) {
|
||||
XLOGF(INFO, "{} found dead servers {}, create a new map", nodeId_, fmt::join(dead.begin(), dead.end(), ","));
|
||||
update = true;
|
||||
}
|
||||
if (exit) {
|
||||
XLOGF(INFO, "{} exiting, create a new map", nodeId_);
|
||||
dead.insert(nodeId_);
|
||||
update = true;
|
||||
}
|
||||
|
||||
if (!update) {
|
||||
co_return false;
|
||||
}
|
||||
|
||||
std::set<flat::NodeId> active;
|
||||
if (current) {
|
||||
for (auto node : current->active) {
|
||||
if (!dead.contains(node)) {
|
||||
active.insert(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!exit) {
|
||||
active.insert(nodeId_);
|
||||
}
|
||||
ServerMap map{std::vector<flat::NodeId>(active.begin(), active.end())};
|
||||
CO_RETURN_ON_ERROR(co_await updateServerMap(txn, map));
|
||||
co_return true;
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
98
src/meta/components/Distributor.h
Normal file
98
src/meta/components/Distributor.h
Normal file
@@ -0,0 +1,98 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <regex.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class Distributor {
|
||||
public:
|
||||
struct Config : ConfigBase<Config> {
|
||||
CONFIG_HOT_UPDATED_ITEM(update_interval, 1_s);
|
||||
CONFIG_HOT_UPDATED_ITEM(timeout, 30_s);
|
||||
};
|
||||
|
||||
Distributor(const Config &config, flat::NodeId nodeId, std::shared_ptr<kv::IKVEngine> kvEngine)
|
||||
: config_(config),
|
||||
nodeId_(nodeId),
|
||||
kvEngine_(kvEngine) {
|
||||
XLOGF_IF(FATAL, !nodeId_, "invalid node id {}", nodeId_);
|
||||
}
|
||||
|
||||
~Distributor() { stopAndJoin(); }
|
||||
|
||||
flat::NodeId nodeId() const { return nodeId_; }
|
||||
|
||||
void start(CPUExecutorGroup &exec);
|
||||
void stopAndJoin(bool updateMap = true);
|
||||
|
||||
flat::NodeId getServer(InodeId inodeId);
|
||||
|
||||
CoTryTask<std::pair<bool, kv::Versionstamp>> checkOnServer(kv::IReadWriteTransaction &txn, InodeId inodeId);
|
||||
CoTryTask<std::pair<bool, kv::Versionstamp>> checkOnServer(kv::IReadWriteTransaction &txn,
|
||||
InodeId inodeId,
|
||||
flat::NodeId nodeId);
|
||||
|
||||
private:
|
||||
static constexpr auto kPrefix = kv::toStr(kv::KeyPrefix::MetaDistributor);
|
||||
static constexpr auto kMapKey = kPrefix;
|
||||
|
||||
struct ServerMap {
|
||||
SERDE_STRUCT_FIELD(active, std::vector<flat::NodeId>());
|
||||
};
|
||||
|
||||
struct LatestServerMap : ServerMap {
|
||||
kv::Versionstamp versionstamp{0};
|
||||
};
|
||||
|
||||
struct ServerStatus {
|
||||
std::string versionstamp;
|
||||
SteadyTime lastUpdate;
|
||||
};
|
||||
|
||||
struct PerServerKey {
|
||||
static std::string pack(flat::NodeId nodeId);
|
||||
static flat::NodeId unpack(std::string_view key);
|
||||
};
|
||||
|
||||
CoTryTask<kv::Versionstamp> loadVersion(kv::IReadOnlyTransaction &txn);
|
||||
CoTryTask<LatestServerMap> loadServerMap(kv::IReadOnlyTransaction &txn, bool update);
|
||||
|
||||
CoTryTask<void> updateVersion(kv::IReadWriteTransaction &txn);
|
||||
CoTryTask<void> updateServerMap(kv::IReadWriteTransaction &txn, const ServerMap &map);
|
||||
|
||||
CoTryTask<void> update(bool exit);
|
||||
CoTryTask<bool> update(kv::IReadWriteTransaction &txn, bool exit);
|
||||
|
||||
const Config config_;
|
||||
flat::NodeId nodeId_;
|
||||
std::atomic<size_t> updated_{0};
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine_;
|
||||
std::unique_ptr<BackgroundRunner> bgRunner_;
|
||||
|
||||
folly::Synchronized<LatestServerMap> latest_;
|
||||
folly::Synchronized<std::map<flat::NodeId, ServerStatus>> servers_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
246
src/meta/components/FileHelper.cc
Normal file
246
src/meta/components/FileHelper.cc
Normal file
@@ -0,0 +1,246 @@
|
||||
#include "meta/components/FileHelper.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Math.h>
|
||||
#include <folly/experimental/coro/Collect.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <linux/fs.h>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/FileOperation.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
|
||||
#define GET_RAW_ROUTING_INFO() \
|
||||
auto routingInfo = mgmtdClient_->getRoutingInfo(); \
|
||||
if (!routingInfo || !routingInfo->raw()) { \
|
||||
XLOGF(ERR, "RoutingInfo not ready"); \
|
||||
co_return makeError(MgmtdClientCode::kRoutingInfoNotReady); \
|
||||
} \
|
||||
auto rawRoutingInfo = routingInfo->raw()
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
namespace {
|
||||
|
||||
FileOperation::Recorder recorder("meta_server");
|
||||
|
||||
template <typename T>
|
||||
constexpr folly::ordering compare(const T &a, const T &b) {
|
||||
return a < b ? folly::ordering::lt : a > b ? folly::ordering::gt : folly::ordering::eq;
|
||||
}
|
||||
|
||||
auto findMinMax(auto &&map) {
|
||||
auto min = map.begin();
|
||||
auto max = map.begin();
|
||||
for (auto iter = std::next(map.begin()); iter != map.end(); iter++) {
|
||||
if (min->second > iter->second) {
|
||||
min = iter;
|
||||
}
|
||||
if (max->second < iter->second) {
|
||||
max = iter;
|
||||
}
|
||||
}
|
||||
return std::make_pair(min, max);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void FileHelper::start(CPUExecutorGroup &exec) {
|
||||
bgRunner_ = std::make_unique<BackgroundRunner>(&exec.pickNext());
|
||||
bgRunner_->start(
|
||||
"statFs",
|
||||
[&]() -> CoTask<void> {
|
||||
auto cached = *cachedFsStatus_.rlock();
|
||||
if (cached.status_.has_value() && RelativeTime::now() - cached.update_ < config_.statfs_update_interval() &&
|
||||
RelativeTime::now() - cached.update_ < config_.statfs_cache_time()) {
|
||||
// don't need update statFs
|
||||
co_return;
|
||||
}
|
||||
co_await updateStatFs();
|
||||
},
|
||||
[]() { return 200_ms; });
|
||||
}
|
||||
|
||||
void FileHelper::stopAndJoin() {
|
||||
if (bgRunner_) {
|
||||
folly::coro::blockingWait(bgRunner_->stopAll());
|
||||
bgRunner_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<uint64_t> FileHelper::queryLength(const UserInfo &userInfo, const Inode &inode, bool *hasHole) {
|
||||
GET_RAW_ROUTING_INFO();
|
||||
FileOperation fop(*storageClient_, *rawRoutingInfo, userInfo, inode, recorder);
|
||||
auto queryResult = co_await fop.queryChunks(hasHole != nullptr, config_.dynamic_stripe());
|
||||
CO_RETURN_ON_ERROR(queryResult);
|
||||
|
||||
#define QUERY_DETAIL \
|
||||
"file {}, length {}, chunk num {}, total chunk length {}, total chunk num {}", inode.id, queryResult->length, \
|
||||
chunkNum, queryResult->totalChunkLen, queryResult->totalNumChunks
|
||||
|
||||
if (hasHole != nullptr) {
|
||||
auto chunkNum = folly::divCeil(queryResult->length, inode.asFile().layout.chunkSize.u64());
|
||||
*hasHole = queryResult->length != queryResult->totalChunkLen || chunkNum != queryResult->totalNumChunks;
|
||||
if (*hasHole) {
|
||||
XLOGF(ERR, "FileHelper found hole in " QUERY_DETAIL);
|
||||
} else {
|
||||
XLOGF(DBG, "FileHelper check hole for " QUERY_DETAIL);
|
||||
}
|
||||
}
|
||||
XLOGF(DBG, "FileHelper query length for {}, {}.", inode.id, queryResult->length);
|
||||
#undef QUERY_DETAIL
|
||||
|
||||
co_return queryResult->length;
|
||||
}
|
||||
|
||||
CoTryTask<size_t> FileHelper::remove(const UserInfo &userInfo,
|
||||
const Inode &inode,
|
||||
RetryOptions retry,
|
||||
uint32_t removeChunksBatchSize) {
|
||||
if (inode.acl.iflags & FS_IMMUTABLE_FL) {
|
||||
auto msg = fmt::format("try remove file {} with FS_IMMUTABLE_FL", inode.id);
|
||||
XLOG(DFATAL, msg);
|
||||
co_return makeError(MetaCode::kFoundBug, msg);
|
||||
}
|
||||
|
||||
GET_RAW_ROUTING_INFO();
|
||||
FileOperation fop(*storageClient_, *rawRoutingInfo, userInfo, inode, recorder);
|
||||
size_t total = 0;
|
||||
while (true) {
|
||||
auto result = co_await fop.removeChunks(0, removeChunksBatchSize, config_.dynamic_stripe(), retry);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
auto [removed, more] = *result;
|
||||
total += removed;
|
||||
if (!more) {
|
||||
break;
|
||||
}
|
||||
XLOGF(DBG, "File {} has more chunks to remove after removed {} chunks", inode.id, removed);
|
||||
}
|
||||
co_return total;
|
||||
}
|
||||
|
||||
CoTryTask<FsStatus> FileHelper::statFs(const UserInfo &userInfo, std::chrono::milliseconds cacheDuration) {
|
||||
auto cached = *cachedFsStatus_.rlock();
|
||||
if (!cached.status_.has_value() || RelativeTime::now() - cached.update_ > config_.statfs_cache_time()) {
|
||||
if (UNLIKELY(!bgRunner_)) {
|
||||
XLOGF(DFATAL, "FileHelper not started");
|
||||
co_return makeError(MetaCode::kFoundBug, "FileHelper not started!");
|
||||
}
|
||||
co_return makeError(StorageClientCode::kResourceBusy, "cached statfs outdate, try again");
|
||||
}
|
||||
co_return *cached.status_;
|
||||
}
|
||||
|
||||
CoTryTask<void> FileHelper::updateStatFs() {
|
||||
static constexpr double kTiB = 1ULL << 40;
|
||||
|
||||
std::vector<folly::SemiFuture<Result<storage::SpaceInfoRsp>>> reqs;
|
||||
auto nodes = mgmtdClient_->getRoutingInfo()->getNodeBy(flat::selectNodeByType(flat::NodeType::STORAGE) &&
|
||||
flat::selectActiveNode());
|
||||
for (auto &node : nodes) {
|
||||
auto req = storageClient_->querySpaceInfo(storage::NodeId(node.app.nodeId)).semi();
|
||||
reqs.push_back(std::move(req));
|
||||
}
|
||||
|
||||
uint64_t cap = 0;
|
||||
uint64_t free = 0;
|
||||
auto results = co_await folly::coro::collectAllRange(std::move(reqs));
|
||||
std::map<flat::NodeId, size_t> nodesFree;
|
||||
std::map<std::pair<flat::NodeId, std::string>, size_t> pathFree;
|
||||
for (size_t i = 0; i < nodes.size(); i++) {
|
||||
const auto &node = nodes.at(i);
|
||||
const auto &result = results.at(i);
|
||||
if (result.hasError()) {
|
||||
XLOGF(ERR, "FileHelper statFs: failed to querySpaceInfo of {}, error {}", node, result.error());
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto &rsp = *result;
|
||||
for (const auto &space : rsp.spaceInfos) {
|
||||
XLOGF(DBG,
|
||||
"FileHelper statFs: node {}, path {}, cap {:.1f}TiB, free {:.1f}TiB",
|
||||
node.app.nodeId,
|
||||
space.path,
|
||||
space.capacity / kTiB,
|
||||
space.free / kTiB);
|
||||
cap += space.capacity;
|
||||
free += std::min(space.free, space.capacity);
|
||||
nodesFree[node.app.nodeId] += space.free;
|
||||
pathFree[std::make_pair(node.app.nodeId, space.path)] = space.free;
|
||||
}
|
||||
}
|
||||
|
||||
// log space info
|
||||
if (!pathFree.empty()) {
|
||||
auto status = cachedFsStatus_.rlock()->status_;
|
||||
if (status) {
|
||||
XLOGF(INFO,
|
||||
"FileHelper statFs: cap {:.1f}TiB, free {:.1f}TiB, prev free {:.1f}TiB, free diff {:.1f}TiB",
|
||||
cap / kTiB,
|
||||
free / kTiB,
|
||||
status->free / kTiB,
|
||||
((int64_t)free - (int64_t)status->free) / kTiB);
|
||||
} else {
|
||||
XLOGF(INFO, "FileHelper statFs: cap {:.1f}TiB, free {:.1f}TiB", cap / kTiB, free / kTiB);
|
||||
}
|
||||
|
||||
auto threshold = config_.statfs_space_imbalance_threshold();
|
||||
auto [minNode, maxNode] = findMinMax(nodesFree);
|
||||
auto avgNodeCap = cap / nodesFree.size();
|
||||
auto nodeDiff = (double)(maxNode->second - minNode->second) / avgNodeCap * 100.0;
|
||||
auto nodeMsg = fmt::format("{} {:.1f}TiB free, {} {:.1f}TiB free, avgNodeCap {:.1f}TiB, diff {:.3f}%",
|
||||
minNode->first,
|
||||
minNode->second / kTiB,
|
||||
maxNode->first,
|
||||
maxNode->second / kTiB,
|
||||
avgNodeCap / kTiB,
|
||||
nodeDiff);
|
||||
if (nodeDiff > threshold) {
|
||||
XLOGF(WARN, "FileHelper statFs: node space utilization imbalance, {}", nodeMsg);
|
||||
} else {
|
||||
XLOGF(INFO, "FileHelper statFs: {}", nodeMsg);
|
||||
}
|
||||
|
||||
auto [minPath, maxPath] = findMinMax(pathFree);
|
||||
auto avgPathCap = cap / pathFree.size();
|
||||
auto pathDiff = (double)(maxPath->second - minPath->second) / avgPathCap * 100.0;
|
||||
auto pathMsg = fmt::format("{}:{} {:.1f}TiB free, {}:{} {:.1f}TiB free, avgPathCap {:.1f}TiB, diff {:.3f}%",
|
||||
minPath->first.first,
|
||||
minPath->first.second,
|
||||
minPath->second / kTiB,
|
||||
maxPath->first.first,
|
||||
maxPath->first.second,
|
||||
maxPath->second / kTiB,
|
||||
avgPathCap / kTiB,
|
||||
pathDiff);
|
||||
if (pathDiff > threshold) {
|
||||
XLOGF(WARN, "FileHelper statFs: disk space utilization imbalance, {}", pathMsg);
|
||||
} else {
|
||||
XLOGF(INFO, "FileHelper statFs: {}", pathMsg);
|
||||
}
|
||||
}
|
||||
|
||||
auto guard = cachedFsStatus_.wlock();
|
||||
guard->update_ = RelativeTime::now();
|
||||
guard->status_ = FsStatus(cap, cap - free, free);
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
} // namespace hf3fs::meta::server
|
||||
73
src/meta/components/FileHelper.h
Normal file
73
src/meta/components/FileHelper.h
Normal file
@@ -0,0 +1,73 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/SharedMutex.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/experimental/coro/BlockingWait.h>
|
||||
#include <folly/experimental/coro/Mutex.h>
|
||||
#include <folly/experimental/coro/SharedMutex.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/base/Config.h"
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
using FsStatus = StatFsRsp;
|
||||
|
||||
class FileHelper {
|
||||
public:
|
||||
using RetryOptions = storage::client::RetryOptions;
|
||||
|
||||
FileHelper(const Config &config,
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient,
|
||||
std::shared_ptr<storage::client::StorageClient> storageClient)
|
||||
: config_(config),
|
||||
mgmtdClient_(std::move(mgmtdClient)),
|
||||
storageClient_(std::move(storageClient)) {}
|
||||
|
||||
~FileHelper() { stopAndJoin(); }
|
||||
|
||||
void start(CPUExecutorGroup &exec);
|
||||
void stopAndJoin();
|
||||
|
||||
CoTryTask<uint64_t> queryLength(const UserInfo &userInfo, const Inode &inode, bool *hasHole = nullptr);
|
||||
|
||||
CoTryTask<size_t> remove(const UserInfo &userInfo,
|
||||
const Inode &inode,
|
||||
RetryOptions retry,
|
||||
uint32_t removeChunksBatchSize);
|
||||
|
||||
CoTryTask<FsStatus> statFs(const UserInfo &userInfo, std::chrono::milliseconds cacheDuration);
|
||||
|
||||
std::optional<FsStatus> cachedFsStatus() const { return cachedFsStatus_.rlock()->status_; }
|
||||
|
||||
private:
|
||||
CoTryTask<void> updateStatFs();
|
||||
|
||||
const Config &config_;
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient_;
|
||||
std::shared_ptr<storage::client::StorageClient> storageClient_;
|
||||
|
||||
struct CachedFsStatus {
|
||||
RelativeTime update_;
|
||||
std::optional<FsStatus> status_;
|
||||
};
|
||||
|
||||
std::unique_ptr<BackgroundRunner> bgRunner_;
|
||||
folly::Synchronized<CachedFsStatus> cachedFsStatus_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
172
src/meta/components/Forward.h
Normal file
172
src/meta/components/Forward.h
Normal file
@@ -0,0 +1,172 @@
|
||||
#pragma once
|
||||
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "client/mgmtd/MgmtdClientForServer.h"
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/net/Client.h"
|
||||
#include "common/net/RequestOptions.h"
|
||||
#include "common/serde/CallContext.h"
|
||||
#include "common/serde/ClientContext.h"
|
||||
#include "common/serde/ClientMockContext.h"
|
||||
#include "common/serde/MessagePacket.h"
|
||||
#include "common/utils/Address.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/StatusCode.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "fbs/meta/Utils.h"
|
||||
#include "fmt/core.h"
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class Forward {
|
||||
public:
|
||||
struct Config : ConfigBase<Config> {
|
||||
CONFIG_HOT_UPDATED_ITEM(debug, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(addr_type, net::Address::Type::RDMA);
|
||||
CONFIG_HOT_UPDATED_ITEM(timeout, 10_s);
|
||||
};
|
||||
|
||||
using NetClient = std::reference_wrapper<net::Client>;
|
||||
using MockClient = std::reference_wrapper<std::map<flat::NodeId, serde::ClientMockContext>>;
|
||||
|
||||
Forward(const Config &config,
|
||||
flat::NodeId nodeId,
|
||||
std::variant<NetClient, MockClient> client,
|
||||
std::shared_ptr<::hf3fs::client::ICommonMgmtdClient> mgmtdClient)
|
||||
: config_(config),
|
||||
nodeId_(nodeId),
|
||||
client_(client),
|
||||
mgmtdClient_(mgmtdClient) {
|
||||
XLOGF_IF(FATAL, !nodeId_, "invalid nodeId {}", nodeId_);
|
||||
}
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
CoTryTask<Rsp> forward(flat::NodeId node, Req req) {
|
||||
OperationRecorder::Guard record(OperationRecorder::server(), "forward", req.user.uid);
|
||||
auto result = co_await forwardImpl<Req, Rsp>(node, std::move(req));
|
||||
record.finish(result);
|
||||
co_return result;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Req, typename Rsp, typename Context>
|
||||
struct ForwardMethod {};
|
||||
|
||||
template <typename Context>
|
||||
struct ForwardMethod<SyncReq, SyncRsp, Context> {
|
||||
static constexpr auto rpcMethod = MetaSerde<>::sync<Context>;
|
||||
};
|
||||
|
||||
template <typename Context>
|
||||
struct ForwardMethod<CloseReq, CloseRsp, Context> {
|
||||
static constexpr auto rpcMethod = MetaSerde<>::close<Context>;
|
||||
};
|
||||
|
||||
template <typename Context>
|
||||
struct ForwardMethod<SetAttrReq, SetAttrRsp, Context> {
|
||||
static constexpr auto rpcMethod = MetaSerde<>::setAttr<Context>;
|
||||
};
|
||||
|
||||
template <typename Context>
|
||||
struct ForwardMethod<CreateReq, CreateRsp, Context> {
|
||||
static constexpr auto rpcMethod = MetaSerde<>::create<Context>;
|
||||
};
|
||||
|
||||
template <typename Req>
|
||||
Result<Void> check(flat::NodeId node, Req &req) {
|
||||
if (!node) {
|
||||
XLOGF(WARN, "request {}, unknown corresponding server, need retry", req);
|
||||
return makeError(MetaCode::kForwardFailed, "unknown corresponding server");
|
||||
}
|
||||
if (req.forward) {
|
||||
XLOGF_IF(INFO, config_.debug(), "request is forward from {}, can't forward again, req {}.", req.forward, req);
|
||||
return makeError(MetaCode::kForwardFailed, "double forward, retry");
|
||||
}
|
||||
req.forward = nodeId_;
|
||||
|
||||
XLOGF_IF(INFO, config_.debug(), "forward req {} to {}", req, node);
|
||||
XLOGF_IF(DBG, !config_.debug(), "forward req {} to {}", req, node);
|
||||
XLOGF_IF(FATAL, nodeId_ == node, "forward to self, {} == {}", nodeId_, node);
|
||||
|
||||
return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<net::Address> getAddress(flat::NodeId node) {
|
||||
auto routing = mgmtdClient_->getRoutingInfo();
|
||||
if (!routing) {
|
||||
co_return makeError(MetaCode::kForwardFailed, "routing info not ready, need retry");
|
||||
}
|
||||
|
||||
auto *nodeInfo = routing->raw()->getNode(node);
|
||||
if (!nodeInfo) {
|
||||
auto msg = fmt::format("req forward: routing info doesn't contains node {}", node);
|
||||
XLOG(WARN, msg);
|
||||
co_return makeError(MetaCode::kForwardFailed, std::move(msg));
|
||||
}
|
||||
auto addrs = nodeInfo->extractAddresses("MetaSerde", config_.addr_type());
|
||||
if (addrs.empty()) {
|
||||
auto msg =
|
||||
fmt::format("req forward: node {} doesn't have {} addr.", node, magic_enum::enum_name(config_.addr_type()));
|
||||
XLOG(WARN, msg);
|
||||
co_return makeError(MetaCode::kForwardFailed, std::move(msg));
|
||||
}
|
||||
co_return addrs.front();
|
||||
}
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
CoTryTask<Rsp> forwardImpl(flat::NodeId node, Req req) {
|
||||
CO_RETURN_ON_ERROR(check(node, req));
|
||||
|
||||
auto opts = net::UserRequestOptions();
|
||||
opts.timeout = config_.timeout();
|
||||
opts.sendRetryTimes = 3;
|
||||
opts.compression = std::nullopt;
|
||||
|
||||
Result<Rsp> result = makeError(MetaCode::kFoundBug);
|
||||
if (std::holds_alternative<NetClient>(client_)) {
|
||||
auto &client = std::get<NetClient>(client_);
|
||||
auto addr = co_await getAddress(node);
|
||||
CO_RETURN_ON_ERROR(addr);
|
||||
auto ctx = client.get().serdeCtx(*addr);
|
||||
result = co_await ForwardMethod<Req, Rsp, serde::ClientContext>::rpcMethod(ctx, req, &opts, nullptr);
|
||||
} else {
|
||||
auto &client = std::get<MockClient>(client_);
|
||||
if (!client.get().contains(node)) {
|
||||
co_return makeError(MetaCode::kForwardFailed, fmt::format("{} not found", node));
|
||||
}
|
||||
auto &ctx = client.get()[node];
|
||||
result = co_await ForwardMethod<Req, Rsp, serde::ClientMockContext>::rpcMethod(ctx, req, &opts, nullptr);
|
||||
}
|
||||
|
||||
if (result.hasError() && StatusCode::typeOf(result.error().code()) == StatusCodeType::RPC) {
|
||||
XLOGF(ERR, "failed to forward req to {}, error {}", node, result.error());
|
||||
co_return makeError(MetaCode::kForwardTimeout,
|
||||
fmt::format("failed to forward req to {}, error {}", node, result.error()));
|
||||
}
|
||||
|
||||
if (result.hasError()) {
|
||||
XLOGF_IF(INFO, config_.debug(), "forward req {} to {}, rsp {}", req, node, result.error());
|
||||
} else {
|
||||
XLOGF_IF(INFO, config_.debug(), "forward req {} to {}, rsp {}", req, node, result.value());
|
||||
}
|
||||
|
||||
co_return result;
|
||||
}
|
||||
|
||||
const Config &config_;
|
||||
flat::NodeId nodeId_;
|
||||
std::variant<NetClient, MockClient> client_;
|
||||
std::shared_ptr<::hf3fs::client::ICommonMgmtdClient> mgmtdClient_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
903
src/meta/components/GcManager.cc
Normal file
903
src/meta/components/GcManager.cc
Normal file
@@ -0,0 +1,903 @@
|
||||
#include "meta/components/GcManager.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Random.h>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/String.h>
|
||||
#include <folly/experimental/coro/BlockingWait.h>
|
||||
#include <folly/experimental/coro/Sleep.h>
|
||||
#include <folly/experimental/coro/ViaIfAsync.h>
|
||||
#include <folly/functional/Partial.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <limits>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/limits.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/WithTransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/CoroutinesPool.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/MagicEnum.hpp"
|
||||
#include "common/utils/Path.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/SemaphoreGuard.h"
|
||||
#include "common/utils/StatusCode.h"
|
||||
#include "common/utils/SysResource.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/core/user/User.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fdb/FDBTransaction.h"
|
||||
#include "fmt/format.h"
|
||||
#include "foundationdb/fdb_c_types.h"
|
||||
#include "meta/components/FileHelper.h"
|
||||
#include "meta/event/Event.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/FileSession.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Utils.h"
|
||||
#include "meta/store/ops/SetAttr.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
static constexpr size_t kNumGcDirectoryPerServer = 5; // 4 + 1
|
||||
|
||||
namespace {
|
||||
monitor::CountRecorder gcSuccCount("meta_server.gc_success");
|
||||
monitor::CountRecorder gcFailCount("meta_server.gc_fail");
|
||||
monitor::CountRecorder gcCritical("meta_server.gc_critical");
|
||||
monitor::CountRecorder gcEnqueue("meta_server.gc_enqueue");
|
||||
monitor::CountRecorder gcBusy("meta_server.gc_busy");
|
||||
monitor::LatencyRecorder gcLatency("meta_server.gc_latency");
|
||||
monitor::DistributionRecorder chunksDist("meta_server.gc_chunks");
|
||||
} // namespace
|
||||
|
||||
/* GcManager::GcDirectory */
|
||||
void GcManager::GcDirectory::start(GcManager &manager, CPUExecutorGroup &exec) {
|
||||
std::vector<GcEntryType> types{GcEntryType::DIRECTORY,
|
||||
GcEntryType::FILE_LARGE,
|
||||
GcEntryType::FILE_MEDIUM,
|
||||
GcEntryType::FILE_SMALL};
|
||||
assert(types.size() == GcEntryType::MAX);
|
||||
for (auto type : types) {
|
||||
latch_.increase();
|
||||
co_withCancellation(cancel_.getToken(), scan(manager, type)).scheduleOn(&exec.pickNext()).start();
|
||||
}
|
||||
}
|
||||
|
||||
void GcManager::GcDirectory::stopAndJoin() {
|
||||
cancel_.requestCancellation();
|
||||
folly::coro::blockingWait(latch_.wait());
|
||||
}
|
||||
|
||||
CoTask<void> GcManager::GcDirectory::scan(GcManager &manager, GcEntryType type) {
|
||||
SCOPE_EXIT { latch_.countDown(); };
|
||||
|
||||
std::optional<std::string> prev;
|
||||
auto prevTime = SteadyClock::now();
|
||||
while (true) {
|
||||
auto wait = manager.config_.gc().scan_interval();
|
||||
if (manager.config_.gc().enable()) {
|
||||
if (auto now = SteadyClock::now(); now - prevTime > 5_s) {
|
||||
prev = std::nullopt;
|
||||
prevTime = now;
|
||||
}
|
||||
auto delay = type == GcEntryType::DIRECTORY ? manager.config_.gc().gc_directory_delay()
|
||||
: manager.config_.gc().gc_file_delay();
|
||||
auto result =
|
||||
co_await scan(manager, type, manager.enableGcDelay() ? delay : 0_ms, manager.config_.gc().scan_batch(), prev);
|
||||
if (!result) {
|
||||
XLOGF(ERR, "GcDirectory {} scan {} failed, error {}", name(), magic_enum::enum_name(type), result.error());
|
||||
} else if (!*result) {
|
||||
wait = std::max(wait, 100_ms);
|
||||
}
|
||||
}
|
||||
auto res = co_await folly::coro::co_awaitTry(folly::coro::sleep(wait.asUs()));
|
||||
if (UNLIKELY(res.hasException())) {
|
||||
XLOGF_IF(FATAL, !res.hasException<OperationCancelled>(), "Exception {}", res.exception().what());
|
||||
XLOGF(INFO, "GcDirectory::scan {} {} exit", entry_.name, magic_enum::enum_name(type));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<bool> GcManager::GcDirectory::scan(GcManager &manager,
|
||||
GcEntryType type,
|
||||
Duration delay,
|
||||
size_t limit,
|
||||
std::optional<std::string> &prev) {
|
||||
auto &state = states_[type];
|
||||
XLOGF(DBG, "GcDirectory {} scan {}, queued {}", name(), magic_enum::enum_name(type), state.counter.load());
|
||||
|
||||
auto prefix = prefixOf(type);
|
||||
std::string prefixstr = fmt::format("{}", prefix);
|
||||
std::string beginkey = prefixstr;
|
||||
auto endtime = UtcTime::fromMicroseconds(UtcClock::now().toMicroseconds() - delay.asUs().count());
|
||||
std::string endkey = formatGcEntry(prefix, endtime, InodeId::root());
|
||||
XLOGF_IF(FATAL, beginkey >= endkey, "{} >= {}", beginkey, endkey);
|
||||
|
||||
// skip some keys
|
||||
if (prev && *prev > beginkey) {
|
||||
beginkey = std::min(*prev, endkey);
|
||||
}
|
||||
|
||||
auto queued = state.queued.lock();
|
||||
auto empty = true;
|
||||
while (beginkey < endkey) {
|
||||
auto cnt = state.counter.load();
|
||||
if (cnt >= limit) {
|
||||
XLOGF(DBG, "GcDirectory skip scan, queuedCnt {}", cnt);
|
||||
co_return true;
|
||||
}
|
||||
|
||||
auto finished = state.finished.withLock([](auto &v) { return std::exchange(v, {}); });
|
||||
for (auto &inode : finished) {
|
||||
queued->erase(inode);
|
||||
}
|
||||
|
||||
auto txn = manager.kvEngine_->createReadonlyTransaction();
|
||||
auto result = co_await DirEntryList::snapshotLoad(*txn, dirId(), beginkey, endkey, limit - cnt);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
for (auto &entry : result->entries) {
|
||||
if (!queued->contains(entry.id)) {
|
||||
queued->insert(entry.id);
|
||||
state.counter++;
|
||||
auto task = GcTask(shared_from_this(), type, entry);
|
||||
manager.gcWorkers_->enqueue(std::move(task), priorityOf(type));
|
||||
}
|
||||
}
|
||||
|
||||
if (!result->entries.empty()) {
|
||||
beginkey = result->entries.back().name;
|
||||
prev = result->entries.back().name;
|
||||
empty = false;
|
||||
}
|
||||
if (!result->more) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
co_return !empty;
|
||||
}
|
||||
|
||||
void GcManager::GcDirectory::finish(const GcManager::GcTask &task) {
|
||||
XLOGF_IF(FATAL, task.type >= GcEntryType::MAX, "Invalid GcEntryType {}", (int)task.type);
|
||||
auto &state = states_[task.type];
|
||||
state.finished.lock()->insert(task.taskEntry.id);
|
||||
auto cnt = state.counter--;
|
||||
XLOGF_IF(FATAL, cnt < 0, "cnt {}", cnt);
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcDirectory::add(auto &txn, const Inode &inode, const GcConfig &config, GcInfo gcInfo) {
|
||||
gcEnqueue.addSample(1);
|
||||
switch (inode.getType()) {
|
||||
case InodeType::File:
|
||||
co_return co_await addFile(txn, inode, config);
|
||||
case InodeType::Directory:
|
||||
// only directory need gcInfo
|
||||
co_return co_await addDirectory(txn, inode, gcInfo);
|
||||
default:
|
||||
XLOGF(FATAL, "Invalid inode type {}, inode {}", magic_enum::enum_name(inode.getType()), inode);
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcDirectory::addFile(auto &txn, const Inode &inode, const GcConfig &config) {
|
||||
auto prefix = prefixOf(GcEntryType::FILE_MEDIUM);
|
||||
auto chunks = inode.asFile().length / inode.asFile().layout.chunkSize;
|
||||
if (chunks >= config.large_file_chunks()) {
|
||||
prefix = prefixOf(GcEntryType::FILE_LARGE);
|
||||
}
|
||||
if (chunks < config.small_file_chunks()) {
|
||||
prefix = prefixOf(GcEntryType::FILE_SMALL);
|
||||
}
|
||||
auto entry = DirEntry::newFile(dirId(), formatGcEntry(prefix, UtcClock::now(), inode.id), inode.id);
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
XLOGF(DBG, "GcManager create GC entry {}", entry);
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcDirectory::addDirectory(auto &txn, const Inode &inode, GcInfo gcInfo) {
|
||||
auto prefix = prefixOf(GcEntryType::DIRECTORY);
|
||||
auto entry = DirEntry::newDirectory(dirId(), formatGcEntry(prefix, UtcClock::now(), inode.id), inode.id, inode.acl);
|
||||
entry.gcInfo = gcInfo;
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
XLOGF(DBG, "GcManager create GC entry {}", entry);
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcDirectory::moveToTail(auto &txn, const GcTask &task, Duration delay) {
|
||||
// check task entry still exists
|
||||
auto exists = co_await DirEntry::checkExist(txn, task.taskEntry.parent, task.taskEntry.name);
|
||||
CO_RETURN_ON_ERROR(exists);
|
||||
if (*exists) {
|
||||
auto entry = task.taskEntry;
|
||||
entry.name = formatGcEntry(prefixOf(task.type), UtcClock::now() + delay.asUs(), task.taskEntry.id);
|
||||
XLOGF_IF(FATAL, entry.isSymlink(), "entry is symlink {}", entry);
|
||||
XLOGF_IF(FATAL, !entry.valid(), "Invalid entry {} {}", entry, entry.valid().error());
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await task.taskEntry.remove(txn));
|
||||
}
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
/* GcManager::GcTask */
|
||||
CoTryTask<flat::UserAttr> GcManager::GcTask::getUserAttr(GcManager &manager, flat::Uid uid) {
|
||||
auto res = co_await manager.userStore_->getUser(uid);
|
||||
if (res.hasError() && res.error().code() == StatusCode::kAuthenticationFail) {
|
||||
// user not found, maybe running in unittest, or user has been removed
|
||||
auto user = flat::UserAttr();
|
||||
user.uid = uid;
|
||||
user.gid = flat::Gid(uid.toUnderType());
|
||||
user.groups = {};
|
||||
user.name = fmt::format("user-{}", uid.toUnderType());
|
||||
co_return user;
|
||||
}
|
||||
co_return res;
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcTask::run(GcManager &manager) {
|
||||
XLOGF(DBG, "GcTask {} run", taskEntry);
|
||||
Result<Void> result = makeError(MetaCode::kFoundBug);
|
||||
switch (taskEntry.type) {
|
||||
case InodeType::File:
|
||||
result = co_await gcFile(manager);
|
||||
break;
|
||||
case InodeType::Directory:
|
||||
result = co_await gcDirectory(manager);
|
||||
break;
|
||||
case InodeType::Symlink:
|
||||
default:
|
||||
XLOGF(FATAL, "Invalid type {}, {}", magic_enum::enum_name(taskEntry.type), taskEntry);
|
||||
}
|
||||
|
||||
if (!result.hasError()) {
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
auto code = result.error().code();
|
||||
auto critical = (StatusCode::typeOf(code) == StatusCodeType::Meta && code != MetaCode::kBusy) // Meta error code
|
||||
|| (code == StorageClientCode::kInvalidArg || code == StorageClientCode::kChecksumMismatch ||
|
||||
code == StorageClientCode::kFoundBug); // storage error code
|
||||
if (critical || code == MetaCode::kBusy) {
|
||||
if (critical) {
|
||||
XLOGF(CRITICAL, "GcTask {} run failed, error {}", taskEntry.id, result.error());
|
||||
gcCritical.addSample(1);
|
||||
}
|
||||
co_await manager.runReadWrite([&](IReadWriteTransaction &txn) -> CoTryTask<void> {
|
||||
co_return co_await gcDir->moveToTail(txn, *this, manager.config_.gc().retry_delay());
|
||||
});
|
||||
}
|
||||
if (code == StorageClientCode::kReadOnlyServer) {
|
||||
XLOGF(ERR, "GcTask {} run failed, readonly server, {}", taskEntry, result.error());
|
||||
co_await folly::coro::sleep(std::chrono::seconds(1));
|
||||
}
|
||||
co_return result;
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcTask::gcDirectory(GcManager &manager) {
|
||||
SemaphoreGuard guard(manager.concurrentGcDirSemaphore_);
|
||||
co_await guard.coWait();
|
||||
|
||||
XLOGF_IF(DFATAL, !taskEntry.isDirectory(), "{} is not directory", taskEntry);
|
||||
|
||||
// old version did not record who performed the recursive remove
|
||||
// just assume that the directory owner performed this operation.
|
||||
auto uid = taskEntry.gcInfo ? taskEntry.gcInfo->user : taskEntry.dirAcl->uid;
|
||||
auto user = co_await getUserAttr(manager, uid);
|
||||
CO_RETURN_ON_ERROR(user);
|
||||
|
||||
auto finished = false;
|
||||
auto checked = false;
|
||||
auto handler = [&](IReadWriteTransaction &txn) -> CoTryTask<void> {
|
||||
auto fdbTxn = dynamic_cast<kv::FDBTransaction *>(&txn);
|
||||
if (fdbTxn && manager.config_.gc().txn_low_priority()) {
|
||||
fdbTxn->setOption(FDBTransactionOption::FDB_TR_OPTION_PRIORITY_BATCH, {});
|
||||
}
|
||||
if (!checked) {
|
||||
auto loadInode = co_await Inode::load(txn, taskEntry.id);
|
||||
CO_RETURN_ON_ERROR(loadInode);
|
||||
if (!loadInode->has_value()) {
|
||||
// inode is already removed, may happens when retry transaction
|
||||
XLOGF(ERR, "taskEntry {}, inode already removed", taskEntry);
|
||||
CO_RETURN_ON_ERROR(co_await removeGcEntryAndInode(manager, txn));
|
||||
finished = true;
|
||||
co_return Void{};
|
||||
}
|
||||
// sanity check
|
||||
auto &inode = **loadInode;
|
||||
if (inode.nlink || !inode.isDirectory() || inode.acl.iflags & FS_IMMUTABLE_FL) {
|
||||
XLOGF(DFATAL,
|
||||
"taskEntry {}, inode {}, nlink {}, directory {}, immutable {}",
|
||||
taskEntry,
|
||||
inode,
|
||||
inode.nlink,
|
||||
inode.isDirectory(),
|
||||
inode.acl.iflags & FS_IMMUTABLE_FL);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
|
||||
Event(Event::Type::GC)
|
||||
.addField("inode", inode.id)
|
||||
.addField("owner", inode.acl.uid)
|
||||
.addField("parent", inode.asDirectory().parent)
|
||||
.addField("name", inode.asDirectory().name)
|
||||
.log();
|
||||
manager.getEventTraceLog().newEntry(MetaEventTrace{.eventType = Event::Type::GC,
|
||||
.inodeId = inode.id,
|
||||
.parentId = inode.asDirectory().parent,
|
||||
.entryName = inode.asDirectory().name,
|
||||
.ownerId = inode.acl.uid});
|
||||
checked = true;
|
||||
}
|
||||
|
||||
auto list = co_await DirEntryList::load(txn, taskEntry.id, "", manager.config_.gc().gc_directory_entry_batch());
|
||||
CO_RETURN_ON_ERROR(list);
|
||||
|
||||
for (auto &entry : list->entries) {
|
||||
CO_RETURN_ON_ERROR(co_await removeEntry(manager, txn, entry, *user));
|
||||
}
|
||||
|
||||
if (!list->more) {
|
||||
CO_RETURN_ON_ERROR(co_await removeGcEntryAndInode(manager, txn));
|
||||
finished = true;
|
||||
}
|
||||
co_return Void{};
|
||||
};
|
||||
|
||||
while (!finished) {
|
||||
CO_RETURN_ON_ERROR(co_await manager.runReadWrite(handler));
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcTask::gcFile(GcManager &manager) {
|
||||
SemaphoreGuard guard(manager.concurrentGcFileSemaphore_);
|
||||
co_await guard.coWait();
|
||||
assert(taskEntry.isFile());
|
||||
XLOGF(DBG, "Gc file {}", taskEntry.id);
|
||||
|
||||
auto load = co_await manager.runReadOnly([&](auto &txn) -> CoTryTask<std::optional<Inode>> {
|
||||
auto fdbTxn = dynamic_cast<kv::FDBTransaction *>(&txn);
|
||||
if (fdbTxn && manager.config_.gc().txn_low_priority()) {
|
||||
fdbTxn->setOption(FDBTransactionOption::FDB_TR_OPTION_PRIORITY_BATCH, {});
|
||||
}
|
||||
if (!manager.config_.gc().check_session()) {
|
||||
co_return co_await Inode::snapshotLoad(txn, taskEntry.id);
|
||||
}
|
||||
|
||||
auto [inode, session] = co_await folly::coro::collectAll(Inode::snapshotLoad(txn, taskEntry.id),
|
||||
FileSession::snapshotCheckExists(txn, taskEntry.id));
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
CO_RETURN_ON_ERROR(session);
|
||||
if (*session) {
|
||||
XLOGF(CRITICAL, "Delay gc file {}, still has session {}.", taskEntry.id, session.value()->clientId);
|
||||
gcBusy.addSample(1);
|
||||
co_return makeError(MetaCode::kBusy, "still have session");
|
||||
}
|
||||
// sanity check
|
||||
if (inode->has_value()) {
|
||||
if (inode->value().nlink || inode->value().acl.iflags & FS_IMMUTABLE_FL) {
|
||||
XLOGF(DFATAL,
|
||||
"taskEntry {}, inode {}, nlink {}, immutable {}",
|
||||
taskEntry,
|
||||
**inode,
|
||||
inode->value().nlink,
|
||||
inode->value().acl.iflags & FS_IMMUTABLE_FL);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
}
|
||||
co_return inode;
|
||||
});
|
||||
CO_RETURN_ON_ERROR(load);
|
||||
auto &inode = *load;
|
||||
std::optional<Event> event;
|
||||
std::optional<MetaEventTrace> trace;
|
||||
if (LIKELY(inode.has_value())) {
|
||||
auto result = co_await manager.fileHelper_->remove(UserInfo(Uid(0), Gid(0)),
|
||||
*inode,
|
||||
manager.config_.gc().retry_remove_chunks(),
|
||||
manager.config_.gc().remove_chunks_batch_size());
|
||||
if (result.hasError()) {
|
||||
XLOGF(ERR, "GcManager failed to remove chunks for {}, error {}", inode->id, result.error());
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
}
|
||||
auto chunks = *result;
|
||||
XLOGF(DBG, "GcManager removed {} chunks for {}", chunks, inode->id);
|
||||
chunksDist.addSample(chunks);
|
||||
event = Event(Event::Type::GC)
|
||||
.addField("inode", inode->id)
|
||||
.addField("owner", inode->acl.uid)
|
||||
.addField("length", inode->asFile().length)
|
||||
.addField("chunks", chunks);
|
||||
trace = MetaEventTrace{
|
||||
.eventType = Event::Type::GC,
|
||||
.inodeId = inode->id,
|
||||
.ownerId = inode->acl.uid,
|
||||
.length = inode->asFile().length,
|
||||
.removedChunks = chunks,
|
||||
};
|
||||
} else {
|
||||
XLOGF(CRITICAL, "Inode of {} not found, shouldn't happen!!", taskEntry.id);
|
||||
}
|
||||
|
||||
auto remove =
|
||||
co_await manager.runReadWrite([&](IReadWriteTransaction &txn) { return removeGcEntryAndInode(manager, txn); });
|
||||
if (remove.hasError()) {
|
||||
XLOGF(ERR, "GcManager failed to remove GC entry and Inode for {}, error {}", taskEntry.id, remove.error());
|
||||
CO_RETURN_ON_ERROR(remove);
|
||||
}
|
||||
|
||||
if (event) {
|
||||
event->log();
|
||||
}
|
||||
|
||||
if (trace) {
|
||||
manager.getEventTraceLog().append(*trace);
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcTask::removeEntry(GcManager &manager,
|
||||
IReadWriteTransaction &txn,
|
||||
const DirEntry &entry,
|
||||
const flat::UserAttr &user) {
|
||||
if (entry.parent != taskEntry.id) {
|
||||
XLOGF(DFATAL, "{}.parent != {}.id", entry, taskEntry);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
auto inode = co_await entry.loadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
if (inode->id != entry.id) {
|
||||
XLOGF(DFATAL, "{}.id != {}.id", inode, entry);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
if (inode->isDirectory() && inode->asDirectory().parent != entry.parent) {
|
||||
XLOGF(DFATAL, "entry {}, inode {}, different parent", entry, *inode);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
|
||||
bool perm = true;
|
||||
if (inode->acl.iflags & FS_IMMUTABLE_FL) {
|
||||
perm = false;
|
||||
} else if (manager.config_.gc().recursive_perm_check() && inode->isDirectory()) {
|
||||
auto check = inode->acl.checkRecursiveRmPerm(flat::UserInfo(user.uid, user.gid, user.groups), false);
|
||||
if (check.hasError()) {
|
||||
// allow remove empty directory
|
||||
auto empty = co_await DirEntryList::checkEmpty(txn, inode->id);
|
||||
CO_RETURN_ON_ERROR(empty);
|
||||
perm = *empty;
|
||||
}
|
||||
}
|
||||
|
||||
if (!perm) {
|
||||
// no permission to remove, move into orphan directory
|
||||
auto orphanEntry = co_await createOrphanEntry(manager, txn, entry, *inode, user);
|
||||
CO_RETURN_ON_ERROR(orphanEntry);
|
||||
XLOGF(CRITICAL, "no permission to perform recursive remove {}, move to {}", entry, *orphanEntry);
|
||||
if (inode->isDirectory()) {
|
||||
inode->asDirectory().parent = orphanEntry->parent;
|
||||
CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
||||
}
|
||||
CO_RETURN_ON_ERROR(co_await entry.remove(txn));
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
// can remove this entry
|
||||
auto gcInfo = GcInfo();
|
||||
gcInfo.user = taskEntry.gcInfo ? taskEntry.gcInfo->user : taskEntry.dirAcl->uid;
|
||||
gcInfo.origPath = taskEntry.gcInfo ? taskEntry.gcInfo->origPath / entry.name : Path(entry.name);
|
||||
CO_RETURN_ON_ERROR(co_await manager.removeEntry(txn, entry, *inode, gcInfo));
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<DirEntry> GcManager::GcTask::createOrphanEntry(GcManager &manager,
|
||||
IReadWriteTransaction &txn,
|
||||
const DirEntry &entry,
|
||||
const Inode &inode,
|
||||
const flat::UserAttr &user) {
|
||||
XLOGF_IF(FATAL, entry.id != inode.id, "entry {}, inode {}", entry, inode);
|
||||
auto orphanDir = Path(fmt::format("trash/gc-orphans/{}-{:%Y%m%d}", user.name, UtcClock::now()));
|
||||
if (inode.isFile()) {
|
||||
orphanDir = orphanDir / fmt::format("{}", entry.parent);
|
||||
}
|
||||
auto orphanName = entry.name;
|
||||
|
||||
auto allocateInodeId = [&]() -> CoTryTask<InodeId> {
|
||||
auto id = co_await manager.idAlloc_->allocate();
|
||||
CO_RETURN_ON_ERROR(id);
|
||||
auto load = co_await Inode::load(txn, *id);
|
||||
CO_RETURN_ON_ERROR(load);
|
||||
if (load->has_value()) {
|
||||
auto &inode = **load;
|
||||
XLOGF(FATAL, "Found duplicated InodeId {}, {}", *id, inode);
|
||||
}
|
||||
co_return *id;
|
||||
};
|
||||
|
||||
// create orphan directory
|
||||
auto parent = InodeId::root();
|
||||
for (const auto &iter : orphanDir) {
|
||||
assert(!iter.empty());
|
||||
const auto &fname = iter.string();
|
||||
for (size_t i = 0; true; i++) {
|
||||
auto name = i == 0 ? fname : fmt::format("{}.{}", fname.substr(0, 240), i);
|
||||
auto entry = co_await DirEntry::load(txn, parent, name);
|
||||
CO_RETURN_ON_ERROR(entry);
|
||||
auto found = entry->has_value();
|
||||
if (found) {
|
||||
if (entry.value()->isDirectory()) {
|
||||
parent = entry.value()->id;
|
||||
break;
|
||||
}
|
||||
XLOGF(WARN, "entry {} exists, but not directory", **entry);
|
||||
continue;
|
||||
}
|
||||
auto id = co_await allocateInodeId();
|
||||
CO_RETURN_ON_ERROR(id);
|
||||
auto acl = Acl(flat::Uid(0), flat::Gid(0), flat::Permission(0755));
|
||||
auto newEntry = DirEntry::newDirectory(parent, name, *id, acl);
|
||||
CO_RETURN_ON_ERROR(co_await newEntry.store(txn));
|
||||
auto newInode = Inode::newDirectory(*id, parent, name, acl, Layout(), UtcClock::now().castGranularity(1_s));
|
||||
CO_RETURN_ON_ERROR(co_await newInode.store(txn));
|
||||
parent = *id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (taskEntry.gcInfo) {
|
||||
// create a symlink under directory, give original path
|
||||
auto symlinkParent = inode.isDirectory() ? inode.id : parent;
|
||||
auto origPath = inode.isDirectory() ? taskEntry.gcInfo->origPath / entry.name : taskEntry.gcInfo->origPath;
|
||||
for (size_t i = 0; true; i++) {
|
||||
auto symlinkName =
|
||||
i == 0 ? "_hf3fs_original_path" : fmt::format("_hf3fs_original_path.{}", UtcClock::now().toMicroseconds());
|
||||
auto entry = co_await DirEntry::load(txn, symlinkParent, symlinkName);
|
||||
CO_RETURN_ON_ERROR(entry);
|
||||
if (entry->has_value()) {
|
||||
if (entry->value().isSymlink()) {
|
||||
auto inode = co_await entry->value().loadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
if (inode->asSymlink().target == origPath) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
auto id = co_await allocateInodeId();
|
||||
CO_RETURN_ON_ERROR(id);
|
||||
auto symlinkInode = Inode::newSymlink(*id,
|
||||
origPath,
|
||||
taskEntry.gcInfo->user,
|
||||
flat::Gid(taskEntry.gcInfo->user.toUnderType()),
|
||||
UtcClock::now().castGranularity(1_s));
|
||||
auto symlinkEntry = DirEntry::newSymlink(symlinkParent, symlinkName, *id);
|
||||
CO_RETURN_ON_ERROR(co_await symlinkInode.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await symlinkEntry.store(txn));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; true; i++) {
|
||||
auto name = i == 0 ? orphanName : fmt::format("{}.{}", orphanName.substr(0, 230), UtcClock::now().toMicroseconds());
|
||||
auto check = co_await DirEntry::checkExist(txn, parent, name);
|
||||
CO_RETURN_ON_ERROR(check);
|
||||
if (auto exists = *check; !exists) {
|
||||
auto orphanEntry = entry;
|
||||
orphanEntry.parent = parent;
|
||||
orphanEntry.name = name;
|
||||
CO_RETURN_ON_ERROR(co_await orphanEntry.store(txn));
|
||||
co_return orphanEntry;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::GcTask::removeGcEntryAndInode(GcManager &manager, IReadWriteTransaction &txn) {
|
||||
CO_RETURN_ON_ERROR(co_await taskEntry.remove(txn, true));
|
||||
CO_RETURN_ON_ERROR(co_await Inode(taskEntry.id).remove(txn));
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::init() {
|
||||
XLOGF(INFO, "GcManager::init");
|
||||
|
||||
auto check = co_await checkFs();
|
||||
if (check.hasError()) {
|
||||
XLOGF(ERR, "GcManager::checkFs failed, error {}", check.error());
|
||||
CO_RETURN_ERROR(check);
|
||||
}
|
||||
|
||||
// each meta server may have 5 GC directory
|
||||
for (size_t i = 0; i < kNumGcDirectoryPerServer; i++) {
|
||||
auto gcDirectory = co_await openGcDirectory(i, i != 0);
|
||||
if (gcDirectory.hasError()) {
|
||||
XLOGF(ERR, "GcManager::openGcDirectory({}, {}) failed, error {}", i, i != 0, gcDirectory.error());
|
||||
CO_RETURN_ERROR(gcDirectory);
|
||||
}
|
||||
if (*gcDirectory) {
|
||||
currGcDirectories_.push_back(*gcDirectory);
|
||||
}
|
||||
}
|
||||
XLOGF_IF(FATAL, currGcDirectories_.empty(), "currGcDirectories_.empty()");
|
||||
|
||||
XLOGF(INFO, "GcManager::init success.");
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
void GcManager::start(CPUExecutorGroup &exec) {
|
||||
XLOGF(DBG, "GcManager start.");
|
||||
XLOGF_IF(FATAL, currGcDirectories_.empty(), "GcDirectory not set!!!");
|
||||
|
||||
// start GC workers
|
||||
gcWorkers_ = std::make_unique<PriorityCoroutinePool<GcTask>>(config_.gc().workers());
|
||||
gcWorkers_->start(folly::partial(&GcManager::runGcTask, this), exec);
|
||||
|
||||
// start GC scanner
|
||||
gcRunner_ = std::make_unique<BackgroundRunner>(&exec.pickNext());
|
||||
gcRunner_->start(
|
||||
"ScanAllGcDirs",
|
||||
[&]() -> CoTask<void> {
|
||||
if (config_.gc().enable()) {
|
||||
auto result = co_await this->scanAllGcDirectories();
|
||||
XLOGF_IF(ERR,
|
||||
result.hasError(),
|
||||
"GcManager failed to scan all available GC directories, error {}",
|
||||
result.error());
|
||||
}
|
||||
},
|
||||
[]() { return 30_s; });
|
||||
|
||||
for (const auto &gcDir : currGcDirectories_) {
|
||||
gcDir->start(*this, exec);
|
||||
}
|
||||
|
||||
XLOGF(INFO, "GcManager started!");
|
||||
}
|
||||
|
||||
void GcManager::stopAndJoin() {
|
||||
XLOGF(INFO, "GcManager stop.");
|
||||
for (const auto &gcDir : currGcDirectories_) {
|
||||
gcDir->stopAndJoin();
|
||||
}
|
||||
if (gcRunner_) {
|
||||
folly::coro::blockingWait(gcRunner_->stopAll());
|
||||
gcRunner_.reset();
|
||||
}
|
||||
if (gcWorkers_) {
|
||||
gcWorkers_->stopAndJoin();
|
||||
gcWorkers_.reset();
|
||||
}
|
||||
XLOGF(INFO, "GcManager stopped!");
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::checkFs() {
|
||||
co_return co_await runReadOnly([&](auto &txn) -> CoTryTask<void> {
|
||||
// check tree roots exist
|
||||
auto exists = [](auto &val) { return val.has_value(); };
|
||||
auto root = (co_await Inode::snapshotLoad(txn, InodeId::root())).then(exists);
|
||||
auto gcRoot = (co_await Inode::snapshotLoad(txn, InodeId::gcRoot())).then(exists);
|
||||
CO_RETURN_ON_ERROR(root);
|
||||
CO_RETURN_ON_ERROR(gcRoot);
|
||||
if (!*root || !*gcRoot) {
|
||||
XLOGF(CRITICAL, "Root or GcRoot not found, root {}, gcRoot {}", root, gcRoot);
|
||||
co_return makeError(MetaCode::kBadFileSystem);
|
||||
}
|
||||
co_return Void{};
|
||||
});
|
||||
}
|
||||
|
||||
CoTryTask<std::shared_ptr<GcManager::GcDirectory>> GcManager::openGcDirectory(size_t idx, bool create) {
|
||||
// generate GC directory name based on nodeId
|
||||
auto gcDirectoryName = GcDirectory::nameOf(nodeId_, idx);
|
||||
XLOGF(INFO, "Open GC directory {}/{}", InodeId::gcRoot(), gcDirectoryName);
|
||||
|
||||
co_return co_await runReadWrite([&](IReadWriteTransaction &txn) -> CoTryTask<std::shared_ptr<GcDirectory>> {
|
||||
auto entry = co_await DirEntry::load(txn, InodeId::gcRoot(), gcDirectoryName);
|
||||
CO_RETURN_ON_ERROR(entry);
|
||||
if (entry->has_value()) {
|
||||
XLOGF(INFO, "GC directory {}/{} -> {} exists", InodeId::gcRoot(), gcDirectoryName, **entry);
|
||||
co_return std::make_shared<GcDirectory>(**entry);
|
||||
}
|
||||
if (!create) {
|
||||
co_return std::shared_ptr<GcDirectory>();
|
||||
}
|
||||
|
||||
// chose InodeId randomly
|
||||
std::vector<InodeId> inodeIds;
|
||||
while (inodeIds.size() < 512) {
|
||||
auto newId = co_await idAlloc_->allocate();
|
||||
CO_RETURN_ON_ERROR(newId);
|
||||
inodeIds.push_back(*newId);
|
||||
}
|
||||
std::shuffle(inodeIds.begin(), inodeIds.end(), std::mt19937(std::random_device()()));
|
||||
|
||||
auto newId = inodeIds.front();
|
||||
auto gcDir = Inode::newDirectory(newId,
|
||||
InodeId::gcRoot(),
|
||||
gcDirectoryName,
|
||||
Acl::gcRoot(),
|
||||
Layout() /* Invalid layout */,
|
||||
UtcClock::now());
|
||||
auto gcEntry = DirEntry::newDirectory(InodeId::gcRoot(), gcDirectoryName, newId, Acl::gcRoot());
|
||||
XLOGF(INFO,
|
||||
"GC directory {}/{} not found, create it: id {}, entry {}.",
|
||||
InodeId::gcRoot(),
|
||||
gcDirectoryName,
|
||||
newId,
|
||||
gcEntry);
|
||||
CO_RETURN_ON_ERROR(co_await gcDir.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await gcEntry.store(txn));
|
||||
|
||||
co_return std::make_shared<GcDirectory>(gcEntry);
|
||||
});
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::removeEntry(IReadWriteTransaction &txn, const DirEntry &entry, Inode &inode, GcInfo gcInfo) {
|
||||
XLOGF(DBG, "GcManager remove entry {}", entry);
|
||||
if (inode.nlink == 0) {
|
||||
auto msg = fmt::format("DirEntry {} exists, but inode {}'s nlink is 0, shouldn't happen!!!", entry, inode);
|
||||
XLOG(DFATAL, msg);
|
||||
co_return makeError(MetaCode::kInconsistent, msg);
|
||||
}
|
||||
if (inode.acl.iflags & FS_IMMUTABLE_FL) {
|
||||
auto msg = fmt::format("can't remove inode {} with FS_IMMUTABLE_FL", inode.id);
|
||||
XLOG(CRITICAL, msg);
|
||||
co_return makeError(MetaCode::kNoPermission, msg);
|
||||
}
|
||||
|
||||
inode.nlink--;
|
||||
SetAttr::update(inode.ctime, UtcClock::now(), config_.time_granularity(), true /* cmp */);
|
||||
|
||||
// add into read conflict set
|
||||
CO_RETURN_ON_ERROR(co_await inode.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await entry.addIntoReadConflict(txn));
|
||||
|
||||
if (entry.isSymlink()) {
|
||||
// remove symlink
|
||||
CO_RETURN_ON_ERROR(co_await entry.remove(txn));
|
||||
if (inode.nlink != 0) {
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
} else {
|
||||
CO_RETURN_ON_ERROR(co_await inode.remove(txn));
|
||||
}
|
||||
} else {
|
||||
// remove directory or file
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await entry.remove(txn));
|
||||
|
||||
if (inode.isDirectory()) {
|
||||
if (inode.nlink != 0) {
|
||||
XLOGF(DFATAL, "Directory {} nlink != 0", inode);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
if (inode.asDirectory().parent != entry.parent) {
|
||||
XLOGF(DFATAL, "Directory inode {}, entry {}, parent not match", inode.asDirectory(), entry);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
}
|
||||
|
||||
if (inode.nlink != 0) {
|
||||
// this is not last reference, can't remove
|
||||
XLOGF(DBG, "Inode {} has {} links after remove {}", inode.id, inode.nlink, entry);
|
||||
} else {
|
||||
auto gcDirectory = pickGcDirectory();
|
||||
CO_RETURN_ON_ERROR(co_await gcDirectory->add(txn, inode, config_.gc(), gcInfo));
|
||||
}
|
||||
}
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> GcManager::scanAllGcDirectories() {
|
||||
std::vector<DirEntry> entries;
|
||||
while (true) {
|
||||
auto result = co_await runReadOnly([&](auto &txn) -> CoTryTask<DirEntryList> {
|
||||
auto prev = entries.empty() ? "" : entries.back().name;
|
||||
co_return co_await DirEntryList::snapshotLoad(txn, InodeId::gcRoot(), prev, 128);
|
||||
});
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
entries.insert(entries.end(), result->entries.begin(), result->entries.end());
|
||||
if (!result->more) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, flat::NodeId> active;
|
||||
if (auto routing = mgmtd_->getRoutingInfo(); routing) {
|
||||
auto nodes = routing->getNodeBy(flat::selectNodeByType(flat::NodeType::META) && flat::selectActiveNode());
|
||||
for (auto &node : nodes) {
|
||||
// skip GC directory 0
|
||||
for (size_t i = 1; i < kNumGcDirectoryPerServer; i++) {
|
||||
active.insert_or_assign(GcDirectory::nameOf(node.app.nodeId, i), node.app.nodeId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<GcDirectory>> activeDirs;
|
||||
std::set<std::string> activeNames, inactiveNames;
|
||||
for (auto &entry : entries) {
|
||||
if (active.contains(entry.name) || std::any_of(currGcDirectories_.begin(),
|
||||
currGcDirectories_.end(),
|
||||
[&](const auto &gcDir) { return entry.name == gcDir->name(); })) {
|
||||
activeDirs.emplace_back(std::make_shared<GcDirectory>(entry));
|
||||
activeNames.insert(entry.name);
|
||||
} else {
|
||||
inactiveNames.insert(entry.name);
|
||||
}
|
||||
}
|
||||
|
||||
XLOGF(INFO,
|
||||
"GcManager found {} GC directories, active {}, inactive {}",
|
||||
entries.size(),
|
||||
fmt::join(activeNames.begin(), activeNames.end(), ","),
|
||||
fmt::join(inactiveNames.begin(), inactiveNames.end(), ","));
|
||||
for (const auto &gcDir : currGcDirectories_) {
|
||||
XLOGF_IF(FATAL,
|
||||
!activeNames.contains(gcDir->name()),
|
||||
"Current GC Directory {} not found under GcRoot",
|
||||
gcDir->name());
|
||||
}
|
||||
allGcDirectories_.withWLock([&](auto &val) { val = activeDirs; });
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
bool GcManager::enableGcDelay() const {
|
||||
if (auto fsStatus = fileHelper_->cachedFsStatus(); fsStatus.has_value()) {
|
||||
auto free = (double)fsStatus->free / fsStatus->capacity * 100;
|
||||
if (free < config_.gc().gc_delay_free_space_threshold()) {
|
||||
XLOGF_EVERY_MS(WARN,
|
||||
5000,
|
||||
"free space {} < {}, disable GC delay",
|
||||
free,
|
||||
config_.gc().gc_delay_free_space_threshold());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
XLOGF_EVERY_MS(WARN, 5000, "GcManager failed to get FsStatus");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CoTask<void> GcManager::runGcTask(GcTask task) {
|
||||
SCOPE_EXIT { task.gcDir->finish(task); };
|
||||
|
||||
if (!config_.gc().enable()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
auto begin = SteadyClock::now();
|
||||
auto result = co_await task.run(*this);
|
||||
|
||||
if (result.hasError()) {
|
||||
XLOGF(ERR, "GC {} failed, error {}", task.taskEntry.id, result.error());
|
||||
gcFailCount.addSample(1);
|
||||
} else {
|
||||
XLOGF(DBG, "GC {} success", task.taskEntry);
|
||||
gcSuccCount.addSample(1, {{"instance", task.taskEntry.isDirectory() ? "directory" : "file"}});
|
||||
gcLatency.addSample(SteadyClock::now() - begin);
|
||||
}
|
||||
|
||||
co_return;
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
296
src/meta/components/GcManager.h
Normal file
296
src/meta/components/GcManager.h
Normal file
@@ -0,0 +1,296 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <folly/Executor.h>
|
||||
#include <folly/Random.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/Utility.h>
|
||||
#include <folly/executors/CPUThreadPoolExecutor.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <gtest/gtest_prod.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "client/mgmtd/MgmtdClient.h"
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/app/ApplicationBase.h"
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/CoroutinesPool.h"
|
||||
#include "common/utils/CountDownLatch.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/PriorityCoroutinePool.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/Semaphore.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "core/user/UserStoreEx.h"
|
||||
#include "fbs/core/user/User.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
#include "fmt/core.h"
|
||||
#include "meta/base/Config.h"
|
||||
#include "meta/components/InodeIdAllocator.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/event/Event.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/FileSession.h"
|
||||
#include "scn/scan/scan.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class FileHelper;
|
||||
using hf3fs::client::ICommonMgmtdClient;
|
||||
|
||||
class GcManager {
|
||||
public:
|
||||
static Result<std::pair<UtcTime, InodeId>> parseGcEntry(std::string_view entry) {
|
||||
char prefix;
|
||||
uint64_t timestamp;
|
||||
uint64_t inode;
|
||||
auto ret = scn::scan(entry, "{}-{}-{:i}", prefix, timestamp, inode);
|
||||
if (!ret) {
|
||||
return makeError(StatusCode::kInvalidArg);
|
||||
}
|
||||
return std::pair<UtcTime, InodeId>{UtcTime::fromMicroseconds(timestamp), InodeId(inode)};
|
||||
}
|
||||
|
||||
static std::string formatGcEntry(char prefix, UtcTime timestamp, InodeId inode) {
|
||||
return fmt::format("{}-{:020d}-{}", prefix, (uint64_t)timestamp.toMicroseconds(), inode.toHexString());
|
||||
}
|
||||
|
||||
GcManager(const Config &config,
|
||||
flat::NodeId nodeId,
|
||||
analytics::StructuredTraceLog<MetaEventTrace> &metaEventTraceLog,
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine,
|
||||
std::shared_ptr<ICommonMgmtdClient> mgmtd,
|
||||
std::shared_ptr<InodeIdAllocator> idAlloc,
|
||||
std::shared_ptr<FileHelper> fileHelper,
|
||||
std::shared_ptr<SessionManager> sessionManager,
|
||||
std::shared_ptr<core::UserStoreEx> userStore)
|
||||
: config_(config),
|
||||
nodeId_(nodeId),
|
||||
metaEventTraceLog_(metaEventTraceLog),
|
||||
kvEngine_(kvEngine),
|
||||
mgmtd_(mgmtd),
|
||||
idAlloc_(idAlloc),
|
||||
fileHelper_(fileHelper),
|
||||
sessionManager_(sessionManager),
|
||||
userStore_(userStore),
|
||||
concurrentGcDirSemaphore_(config_.gc().gc_directory_concurrent()),
|
||||
concurrentGcFileSemaphore_(config_.gc().gc_file_concurrent()) {
|
||||
XLOGF_IF(FATAL, !nodeId_, "invalid node id {}", nodeId_);
|
||||
guard_ = config_.gc().addCallbackGuard([&]() {
|
||||
auto dirConcurrent = config_.gc().gc_directory_concurrent();
|
||||
if (dirConcurrent != 0 && dirConcurrent != concurrentGcDirSemaphore_.getUsableTokens()) {
|
||||
XLOGF(INFO, "GcManager set gc directory concurrent to {}", dirConcurrent);
|
||||
concurrentGcDirSemaphore_.changeUsableTokens(dirConcurrent);
|
||||
XLOGF(INFO, "GcManager finished update gc directory concurrent");
|
||||
}
|
||||
auto fileConcurrent = config_.gc().gc_file_concurrent();
|
||||
if (fileConcurrent != 0 && fileConcurrent != concurrentGcFileSemaphore_.getUsableTokens()) {
|
||||
XLOGF(INFO, "GcManager set gc directory concurrent to {}", fileConcurrent);
|
||||
concurrentGcFileSemaphore_.changeUsableTokens(fileConcurrent);
|
||||
XLOGF(INFO, "GcManager finished update gc file concurrent");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
CoTryTask<void> init();
|
||||
|
||||
void start(CPUExecutorGroup &exec);
|
||||
void stopAndJoin();
|
||||
|
||||
auto &getEventTraceLog() { return metaEventTraceLog_; }
|
||||
|
||||
CoTryTask<void> removeEntry(IReadWriteTransaction &txn, const DirEntry &entry, Inode &inode, GcInfo gcInfo);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
FRIEND_TEST(TestRemove, GC);
|
||||
|
||||
enum GcEntryType {
|
||||
DIRECTORY = 0,
|
||||
FILE_MEDIUM,
|
||||
FILE_LARGE,
|
||||
FILE_SMALL,
|
||||
MAX,
|
||||
};
|
||||
|
||||
class GcDirectory;
|
||||
|
||||
struct GcTask {
|
||||
std::shared_ptr<GcDirectory> gcDir;
|
||||
GcEntryType type;
|
||||
DirEntry taskEntry;
|
||||
|
||||
GcTask(std::shared_ptr<GcDirectory> gcDir, GcEntryType type, DirEntry entry)
|
||||
: gcDir(std::move(gcDir)),
|
||||
type(type),
|
||||
taskEntry(std::move(entry)) {}
|
||||
|
||||
static CoTryTask<flat::UserAttr> getUserAttr(GcManager &manager, flat::Uid uid);
|
||||
|
||||
CoTryTask<void> run(GcManager &manager);
|
||||
CoTryTask<void> gcDirectory(GcManager &manager);
|
||||
CoTryTask<void> gcFile(GcManager &manager);
|
||||
|
||||
CoTryTask<void> removeEntry(GcManager &manager,
|
||||
IReadWriteTransaction &txn,
|
||||
const DirEntry &entry,
|
||||
const flat::UserAttr &user);
|
||||
CoTryTask<void> removeGcEntryAndInode(GcManager &manager, IReadWriteTransaction &txn);
|
||||
CoTryTask<DirEntry> createOrphanEntry(GcManager &manager,
|
||||
IReadWriteTransaction &txn,
|
||||
const DirEntry &entry,
|
||||
const Inode &inode,
|
||||
const flat::UserAttr &user);
|
||||
};
|
||||
|
||||
class GcDirectory : folly::MoveOnly, public std::enable_shared_from_this<GcDirectory> {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<GcDirectory>;
|
||||
|
||||
static char prefixOf(GcEntryType type) {
|
||||
switch (type) {
|
||||
case DIRECTORY:
|
||||
return 'd';
|
||||
case FILE_MEDIUM:
|
||||
return 'f';
|
||||
case FILE_LARGE:
|
||||
return 'L';
|
||||
case FILE_SMALL:
|
||||
return 'S';
|
||||
default:
|
||||
XLOGF(FATAL, "invalid type {}", (int)type);
|
||||
}
|
||||
}
|
||||
|
||||
int8_t priorityOf(GcEntryType type) {
|
||||
switch (type) {
|
||||
case DIRECTORY:
|
||||
return folly::Executor::MID_PRI;
|
||||
case FILE_MEDIUM:
|
||||
return folly::Executor::MID_PRI;
|
||||
case FILE_LARGE:
|
||||
return folly::Executor::HI_PRI;
|
||||
case FILE_SMALL:
|
||||
return folly::Executor::LO_PRI;
|
||||
default:
|
||||
XLOGF(FATAL, "invalid type {}", (int)type);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string nameOf(flat::NodeId nodeId, size_t idx) {
|
||||
return idx == 0 ? fmt::format("GC-Node-{}", (uint32_t)nodeId)
|
||||
: fmt::format("GC-Node-{}.{}", (uint32_t)nodeId, idx);
|
||||
}
|
||||
|
||||
GcDirectory(DirEntry entry)
|
||||
: entry_(std::move(entry)) {}
|
||||
~GcDirectory() { stopAndJoin(); }
|
||||
|
||||
void start(GcManager &manager, CPUExecutorGroup &exec);
|
||||
void stopAndJoin();
|
||||
|
||||
auto dirId() const { return entry_.id; }
|
||||
std::string name() const { return entry_.name; }
|
||||
|
||||
CoTryTask<void> add(auto &txn, const Inode &inode, const GcConfig &config, GcInfo gcInfo);
|
||||
void finish(const GcTask &task);
|
||||
|
||||
CoTryTask<void> moveToTail(auto &txn, const GcTask &task, Duration delay);
|
||||
|
||||
private:
|
||||
struct QueueState {
|
||||
folly::Synchronized<std::set<InodeId>, std::mutex> queued;
|
||||
folly::Synchronized<std::set<InodeId>, std::mutex> finished;
|
||||
std::atomic<uint64_t> counter{0};
|
||||
};
|
||||
|
||||
CoTask<void> scan(GcManager &manager, GcEntryType type);
|
||||
CoTryTask<bool> scan(GcManager &manager,
|
||||
GcEntryType type,
|
||||
Duration delay,
|
||||
size_t limit,
|
||||
std::optional<std::string> &prev);
|
||||
CoTryTask<void> addFile(auto &txn, const Inode &inode, const GcConfig &config);
|
||||
CoTryTask<void> addDirectory(auto &txn, const Inode &inode, GcInfo gcInfo);
|
||||
|
||||
DirEntry entry_; // entry points to this GcDirectory
|
||||
std::array<QueueState, GcEntryType::MAX> states_;
|
||||
CancellationSource cancel_;
|
||||
CountDownLatch<> latch_;
|
||||
};
|
||||
|
||||
const std::vector<GcDirectory::Ptr> &currGcDirectories() const { return currGcDirectories_; }
|
||||
|
||||
GcDirectory::Ptr pickGcDirectory() {
|
||||
XLOGF_IF(FATAL, currGcDirectories_.empty(), "currGcDirectories_.empty()");
|
||||
if (config_.gc().distributed_gc()) {
|
||||
auto guard = allGcDirectories_.rlock();
|
||||
if (!guard->empty()) {
|
||||
return guard->at(folly::Random::rand64(guard->size()));
|
||||
}
|
||||
}
|
||||
if (currGcDirectories_.size() == 1) {
|
||||
return currGcDirectories_[0];
|
||||
} else {
|
||||
return currGcDirectories_[folly::Random::rand32(1, currGcDirectories_.size())];
|
||||
}
|
||||
}
|
||||
|
||||
bool enableGcDelay() const;
|
||||
|
||||
CoTryTask<void> checkFs();
|
||||
CoTryTask<std::shared_ptr<GcDirectory>> openGcDirectory(size_t idx, bool create);
|
||||
CoTryTask<void> scanAllGcDirectories();
|
||||
|
||||
CoTask<void> runGcTask(GcTask task);
|
||||
|
||||
template <typename H>
|
||||
std::invoke_result_t<H, IReadOnlyTransaction &> runReadOnly(H &&handler) {
|
||||
auto retry = kv::FDBRetryStrategy({1_s, 10, true});
|
||||
co_return co_await kv::WithTransaction(retry).run(kvEngine_->createReadonlyTransaction(), std::forward<H>(handler));
|
||||
}
|
||||
|
||||
template <typename H>
|
||||
std::invoke_result_t<H, IReadWriteTransaction &> runReadWrite(H &&handler) {
|
||||
auto retry = kv::FDBRetryStrategy({1_s, 10, true});
|
||||
co_return co_await kv::WithTransaction(retry).run(kvEngine_->createReadWriteTransaction(),
|
||||
std::forward<H>(handler));
|
||||
}
|
||||
|
||||
const Config &config_;
|
||||
std::unique_ptr<config::ConfigCallbackGuard> guard_;
|
||||
flat::NodeId nodeId_;
|
||||
analytics::StructuredTraceLog<MetaEventTrace> &metaEventTraceLog_;
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine_;
|
||||
std::shared_ptr<ICommonMgmtdClient> mgmtd_;
|
||||
std::shared_ptr<InodeIdAllocator> idAlloc_;
|
||||
std::shared_ptr<FileHelper> fileHelper_;
|
||||
std::shared_ptr<SessionManager> sessionManager_;
|
||||
std::shared_ptr<core::UserStoreEx> userStore_;
|
||||
|
||||
std::vector<GcDirectory::Ptr> currGcDirectories_;
|
||||
folly::Synchronized<std::vector<GcDirectory::Ptr>> allGcDirectories_;
|
||||
|
||||
std::unique_ptr<BackgroundRunner> gcRunner_;
|
||||
std::unique_ptr<PriorityCoroutinePool<GcTask>> gcWorkers_;
|
||||
Semaphore concurrentGcDirSemaphore_;
|
||||
Semaphore concurrentGcFileSemaphore_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
73
src/meta/components/InodeIdAllocator.cc
Normal file
73
src/meta/components/InodeIdAllocator.cc
Normal file
@@ -0,0 +1,73 @@
|
||||
#include "meta/components/InodeIdAllocator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/Unit.h>
|
||||
#include <folly/experimental/coro/CurrentExecutor.h>
|
||||
#include <folly/experimental/coro/Promise.h>
|
||||
#include <folly/experimental/coro/Sleep.h>
|
||||
#include <folly/experimental/coro/Timeout.h>
|
||||
#include <folly/io/async/Request.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/Result.h"
|
||||
|
||||
#define FAULT_INJECTION_INODE_ID_ALLOCATOR true
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
std::string InodeIdAllocator::kAllocatorKeyPrefix = fmt::format("{}-inode-alloc", kv::toStr(kv::KeyPrefix::Single));
|
||||
|
||||
CoTryTask<meta::InodeId> InodeIdAllocator::allocateSlow(std::chrono::microseconds timeout) {
|
||||
tryStartAllocateTask(co_await folly::coro::co_current_executor);
|
||||
auto result = co_await folly::coro::co_awaitTry(folly::coro::timeout(queue_.dequeue(), timeout));
|
||||
if (UNLIKELY(result.hasException())) {
|
||||
co_return makeError(MetaCode::kInodeIdAllocFailed);
|
||||
}
|
||||
if (UNLIKELY(queue_.size() < kAllocateBatch / 2)) {
|
||||
tryStartAllocateTask(co_await folly::coro::co_current_executor);
|
||||
}
|
||||
|
||||
co_return result.value();
|
||||
}
|
||||
|
||||
CoTask<void> InodeIdAllocator::allocateFromDB() {
|
||||
auto result = co_await allocator_.allocate();
|
||||
if (UNLIKELY(result.hasError())) {
|
||||
XLOGF(CRITICAL, "Failed to allocate InodeId {}", result.error().describe());
|
||||
// allocation failed, wait sometime and retry in new task
|
||||
startAllocateTask(co_await folly::coro::co_current_executor);
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (UNLIKELY((result.value() & ~kAllocatorMask) != 0)) {
|
||||
XLOGF(FATAL, "64bit InodeId used up, should never happen, {}!!!", result.value());
|
||||
}
|
||||
|
||||
auto first = result.value() << kAllocatorShift;
|
||||
XLOGF(DBG,
|
||||
"Get {} from IdAllocator, corresponding to InodeId {} - {}",
|
||||
result.value(),
|
||||
meta::InodeId(first),
|
||||
meta::InodeId(first + kAllocateBatch - 1));
|
||||
for (uint64_t i = 0; i < kAllocateBatch; i++) {
|
||||
meta::InodeId id(first + i);
|
||||
co_await queue_.enqueue(id);
|
||||
}
|
||||
allocating_.store(false);
|
||||
|
||||
if (UNLIKELY(queue_.size() < kAllocateBatch / 2)) {
|
||||
tryStartAllocateTask(co_await folly::coro::co_current_executor);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
134
src/meta/components/InodeIdAllocator.h
Normal file
134
src/meta/components/InodeIdAllocator.h
Normal file
@@ -0,0 +1,134 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <deque>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Executor.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Range.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/ThreadLocal.h>
|
||||
#include <folly/Utility.h>
|
||||
#include <folly/experimental/coro/Baton.h>
|
||||
#include <folly/experimental/coro/BoundedQueue.h>
|
||||
#include <folly/experimental/coro/CurrentExecutor.h>
|
||||
#include <folly/experimental/coro/Mutex.h>
|
||||
#include <folly/experimental/coro/Promise.h>
|
||||
#include <folly/experimental/coro/Sleep.h>
|
||||
#include <folly/experimental/coro/Timeout.h>
|
||||
#include <folly/fibers/BatchSemaphore.h>
|
||||
#include <folly/io/async/Request.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/IdAllocator.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/**
|
||||
* Generate InodeId range from 0x00000000_00001000 to 0x01ffffff_ffffffff.
|
||||
*
|
||||
* Generated InodeId format: [ high 52bits: generated by IdAllocator ][ low 12 bits: local generated ].
|
||||
* InodeIdAllocator first use IdAllocator to generate a 52bits value, then left shift 12 bits and generate lower 12 bits
|
||||
* locally. So it only need to access the FoundationDB after generate 4096 InodeIds.
|
||||
*/
|
||||
class InodeIdAllocator : public std::enable_shared_from_this<InodeIdAllocator> {
|
||||
// These values are used in FoundationDB
|
||||
static std::string kAllocatorKeyPrefix;
|
||||
static constexpr size_t kAllocatorShard = 32; // avoid txn conflictation
|
||||
static constexpr uint64_t kAllocatorShift = 12; // shift 12 bit
|
||||
static constexpr uint64_t kAllocatorBit =
|
||||
64 - kAllocatorShift; // IdAllocator generated values only have 52bits valid.
|
||||
static constexpr uint64_t kAllocatorMask = (1ULL << kAllocatorBit) - 1;
|
||||
static constexpr uint64_t kAllocateBatch = 1 << kAllocatorShift;
|
||||
|
||||
struct Tag {};
|
||||
|
||||
public:
|
||||
InodeIdAllocator(Tag, std::shared_ptr<kv::IKVEngine> kvEngine)
|
||||
: engine_(std::move(kvEngine)),
|
||||
allocator_(*engine_, createRetryStrategy(), kAllocatorKeyPrefix, kAllocatorShard),
|
||||
allocating_(false),
|
||||
queue_(2 * kAllocateBatch) {}
|
||||
|
||||
static std::shared_ptr<InodeIdAllocator> create(std::shared_ptr<kv::IKVEngine> kvEngine) {
|
||||
return std::make_shared<InodeIdAllocator>(Tag{}, std::move(kvEngine));
|
||||
}
|
||||
|
||||
CoTryTask<InodeId> allocate(std::chrono::microseconds timeout = std::chrono::seconds(2)) {
|
||||
static monitor::CountRecorder failed("meta_inodeid_alloc_failed");
|
||||
|
||||
auto id = queue_.try_dequeue();
|
||||
if (LIKELY(id.has_value())) {
|
||||
if (queue_.size() < kAllocateBatch / 2) {
|
||||
tryStartAllocateTask(co_await folly::coro::co_current_executor);
|
||||
}
|
||||
co_return id.value();
|
||||
}
|
||||
auto result = co_await allocateSlow(timeout);
|
||||
if (result.hasError()) {
|
||||
failed.addSample(1);
|
||||
co_return result;
|
||||
}
|
||||
if (result->u64() >= InodeId::kNewChunkEngineMask) {
|
||||
failed.addSample(1);
|
||||
XLOGF(DFATAL, "InodeId {} is larger than", *result, InodeId(InodeId::kNewChunkEngineMask));
|
||||
co_return makeError(MetaCode::kInodeIdAllocFailed, "InodeId too large, shouldn't happen");
|
||||
}
|
||||
co_return result;
|
||||
}
|
||||
|
||||
private:
|
||||
static kv::FDBRetryStrategy createRetryStrategy() { return kv::FDBRetryStrategy({.retryMaybeCommitted = true}); }
|
||||
|
||||
static CoTask<void> allocateTask(std::weak_ptr<InodeIdAllocator> weak,
|
||||
std::optional<folly::Duration> delay = std::nullopt) {
|
||||
if (delay.has_value()) {
|
||||
co_await folly::coro::sleep(delay.value());
|
||||
}
|
||||
|
||||
auto ptr = weak.lock();
|
||||
if (ptr) {
|
||||
co_await ptr->allocateFromDB();
|
||||
}
|
||||
co_return;
|
||||
}
|
||||
|
||||
void tryStartAllocateTask(folly::Executor *exec) {
|
||||
if (!allocating_.exchange(true)) {
|
||||
startAllocateTask(exec);
|
||||
}
|
||||
}
|
||||
|
||||
void startAllocateTask(folly::Executor *exec) {
|
||||
folly::RequestContextScopeGuard guard;
|
||||
allocateTask(weak_from_this()).scheduleOn(exec).start();
|
||||
}
|
||||
|
||||
CoTryTask<InodeId> allocateSlow(std::chrono::microseconds timeout);
|
||||
CoTask<void> allocateFromDB();
|
||||
|
||||
std::shared_ptr<kv::IKVEngine> engine_;
|
||||
IdAllocator<kv::FDBRetryStrategy> allocator_;
|
||||
std::atomic<bool> allocating_;
|
||||
folly::coro::BoundedQueue<InodeId, false, false> queue_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
334
src/meta/components/SessionManager.cc
Normal file
334
src/meta/components/SessionManager.cc
Normal file
@@ -0,0 +1,334 @@
|
||||
#include "meta/components/SessionManager.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <fmt/format.h>
|
||||
#include <folly/experimental/coro/BlockingWait.h>
|
||||
#include <folly/functional/Partial.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/app/ApplicationBase.h"
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/WithTransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/OptionalUtils.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "common/utils/Uuid.h"
|
||||
#include "fbs/core/user/User.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "fbs/meta/Utils.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
#include "meta/components/FileHelper.h"
|
||||
#include "meta/store/FileSession.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
namespace {
|
||||
|
||||
monitor::CountRecorder pruned("meta_server.sessions_pruned");
|
||||
monitor::CountRecorder pruneFailed("meta_server.sessions_prune_failed");
|
||||
|
||||
CoTryTask<std::set<ClientId>> getActiveClients(client::ICommonMgmtdClient &mgmtd,
|
||||
bool allowBootstrapping,
|
||||
std::optional<Duration> timeout = std::nullopt) {
|
||||
auto result = co_await mgmtd.listClientSessions();
|
||||
if (result.hasError()) {
|
||||
XLOGF(ERR, "Failed to list active clients, error {}", result.error());
|
||||
CO_RETURN_ERROR(result);
|
||||
}
|
||||
if (result->bootstrapping && !allowBootstrapping) {
|
||||
XLOGF(INFO, "Failed to list active clients, mgmtd is bootstrapping.");
|
||||
co_return makeError(MgmtdClientCode::kRoutingInfoNotReady);
|
||||
}
|
||||
std::set<ClientId> clients;
|
||||
for (const auto &session : result->sessions) {
|
||||
auto uuid = Uuid::fromHexString(session.clientId);
|
||||
if (uuid.hasError()) {
|
||||
XLOGF(DFATAL, "Failed to parse client {} id {}, error {}", session.description, session.clientId, uuid.error());
|
||||
continue;
|
||||
}
|
||||
if (*uuid == Uuid::zero()) {
|
||||
XLOGF(DFATAL, "Client {} uuid {} is zero", session.description, session.clientId);
|
||||
continue;
|
||||
}
|
||||
if (timeout.has_value() && session.lastExtend + *timeout + 10_s < UtcClock::now()) {
|
||||
XLOGF(WARN, "Client {} timeout, last extended {}, ", session.description, session.lastExtend);
|
||||
continue;
|
||||
}
|
||||
clients.emplace(*uuid);
|
||||
}
|
||||
co_return clients;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/** SessionManager::ScanTask */
|
||||
CoTryTask<size_t> SessionManager::ScanTask::run(SessionManager &manager) {
|
||||
XLOGF(DBG, "ScanTask-{} start", shard_);
|
||||
std::map<std::string, uint64_t> map;
|
||||
SCOPE_EXIT {
|
||||
for (const auto &[host, cnt] : map) {
|
||||
XLOGF(INFO, "SessionManager found {} sessions for dead clients {}", cnt, host);
|
||||
}
|
||||
};
|
||||
|
||||
// get all active clients
|
||||
auto ts = UtcClock::now();
|
||||
auto active = co_await getActiveClients(*manager.mgmtd_, false, manager.config_.session_timeout());
|
||||
CO_RETURN_ON_ERROR(active);
|
||||
|
||||
size_t total = 0;
|
||||
std::optional<FileSession> prev;
|
||||
while (true) {
|
||||
// scan sessions
|
||||
auto txn = manager.kvEngine_->createReadonlyTransaction();
|
||||
auto sessions = co_await kv::WithTransaction(kv::FDBRetryStrategy{})
|
||||
.run(std::move(txn), [&](auto &txn) -> CoTryTask<std::vector<FileSession>> {
|
||||
co_return co_await FileSession::scan(txn, shard_, prev);
|
||||
});
|
||||
CO_RETURN_ON_ERROR(sessions);
|
||||
if (sessions->empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// filter dead sessions
|
||||
std::vector<FileSession> deadSessions;
|
||||
for (auto &session : *sessions) {
|
||||
if (prune_->sessions.rlock()->contains(session.sessionId)) {
|
||||
// need prune this session
|
||||
XLOGF(INFO, "Need prune session {}", session);
|
||||
prune_->sessions.wlock()->erase(session.sessionId);
|
||||
} else {
|
||||
// check client is active or not
|
||||
if (active->contains(session.clientId)) {
|
||||
continue;
|
||||
}
|
||||
if (session.timestamp + 1_min > ts) {
|
||||
// concurrent create session and scan
|
||||
auto now = UtcClock::now();
|
||||
XLOGF_IF(WARN, session.timestamp > now + 5_s, "Session timestamp {} > now {}", session.timestamp, now);
|
||||
continue;
|
||||
}
|
||||
XLOGF(WARN, "SessionManager found dead session {}", session);
|
||||
}
|
||||
deadSessions.push_back(session);
|
||||
}
|
||||
prev = sessions->back();
|
||||
|
||||
// prune dead sessions
|
||||
if (manager.config_.sync_on_prune_session()) {
|
||||
for (auto &session : deadSessions) {
|
||||
co_await manager.closeWorkers_->enqueue(std::make_unique<CloseTask>(session));
|
||||
}
|
||||
} else {
|
||||
auto txn = manager.kvEngine_->createReadWriteTransaction();
|
||||
auto result =
|
||||
co_await kv::WithTransaction(kv::FDBRetryStrategy{}).run(std::move(txn), [&](auto &txn) -> CoTryTask<Void> {
|
||||
for (auto &session : deadSessions) {
|
||||
CO_RETURN_ON_ERROR(co_await session.remove(txn));
|
||||
}
|
||||
co_return Void{};
|
||||
});
|
||||
if (result.hasError()) {
|
||||
pruneFailed.addSample(deadSessions.size());
|
||||
XLOGF(ERR, "ScanTask-{} prune failed, error {}", shard_, result.error());
|
||||
CO_RETURN_ERROR(result);
|
||||
}
|
||||
total += deadSessions.size();
|
||||
pruned.addSample(deadSessions.size());
|
||||
}
|
||||
}
|
||||
|
||||
auto finished = prune_->finished.fetch_add(1) + 1;
|
||||
if (finished < FileSession::kShard) {
|
||||
co_return total;
|
||||
}
|
||||
|
||||
while (!prune_->sessions.rlock()->empty()) {
|
||||
static constexpr size_t kBatch = 64;
|
||||
std::vector<FileSession> batch;
|
||||
batch.reserve(kBatch);
|
||||
auto wlock = prune_->sessions.wlock();
|
||||
auto iter = wlock->begin();
|
||||
while (iter != wlock->end() && batch.size() < kBatch) {
|
||||
XLOGF(INFO, "Need prune session {}", iter->second);
|
||||
batch.push_back(iter->second);
|
||||
iter = wlock->erase(iter);
|
||||
}
|
||||
wlock.unlock();
|
||||
auto txn = manager.kvEngine_->createReadWriteTransaction();
|
||||
auto prune = co_await kv::WithTransaction(kv::FDBRetryStrategy{})
|
||||
.run(std::move(txn), [&](IReadWriteTransaction &txn) -> CoTryTask<Void> {
|
||||
for (auto &session : batch) {
|
||||
CO_RETURN_ON_ERROR(co_await session.remove(txn));
|
||||
}
|
||||
co_return Void{};
|
||||
});
|
||||
if (prune.hasError()) {
|
||||
pruneFailed.addSample(batch.size());
|
||||
XLOGF(WARN, "Prune session failed, error {}", prune.error());
|
||||
}
|
||||
}
|
||||
|
||||
co_return total;
|
||||
}
|
||||
|
||||
/** SessionManager::CloseTask */
|
||||
CoTryTask<void> SessionManager::CloseTask::run(SessionManager &manager) {
|
||||
XLOGF_IF(FATAL, !manager.close_, "close_ not set");
|
||||
auto req = CloseReq({}, session_.inodeId, SessionInfo(session_.clientId, session_.sessionId), true, {}, {});
|
||||
req.client = session_.clientId;
|
||||
req.pruneSession = true;
|
||||
auto close = co_await manager.close_(req);
|
||||
if (!close.hasError()) {
|
||||
pruned.addSample(1);
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
if (close.error().code() != MetaCode::kNotFound) {
|
||||
XLOGF(ERR, "SessionManager failed to close {}, error {}", req.inode, close.error());
|
||||
pruneFailed.addSample(1);
|
||||
} else {
|
||||
pruned.addSample(1);
|
||||
}
|
||||
|
||||
auto txn = manager.kvEngine_->createReadWriteTransaction();
|
||||
auto prune = co_await kv::WithTransaction(kv::FDBRetryStrategy{})
|
||||
.run(std::move(txn), folly::partial(&FileSession::remove, &session_));
|
||||
if (prune.hasError()) {
|
||||
pruneFailed.addSample(1);
|
||||
XLOGF(WARN, "Prune session {} failed, error {}", session_, prune.error());
|
||||
}
|
||||
co_return prune;
|
||||
}
|
||||
|
||||
/** SessionManager */
|
||||
|
||||
void SessionManager::start(CPUExecutorGroup &exec) {
|
||||
XLOGF(DBG, "SessionManager start");
|
||||
|
||||
closeWorkers_ = std::make_unique<CoroutinesPool<std::unique_ptr<CloseTask>>>(config_.close_workers());
|
||||
closeWorkers_->start([&](auto task) -> CoTask<void> { co_await task->run(*this); }, exec);
|
||||
|
||||
scanWorkers_ = std::make_unique<CoroutinesPool<std::unique_ptr<ScanTask>>>(config_.scan_workers());
|
||||
scanWorkers_->start([&](auto task) -> CoTask<void> { co_await task->run(*this); }, exec);
|
||||
|
||||
scanRunner_ = std::make_unique<BackgroundRunner>(&exec.pickNext());
|
||||
scanRunner_->start("SessionScan", folly::partial(&SessionManager::scanTask, this), config_.scan_interval_getter());
|
||||
|
||||
XLOGF(INFO, "SessionManager started!");
|
||||
}
|
||||
|
||||
void SessionManager::stopAndJoin() {
|
||||
XLOGF(DBG, "SessionManager stop.");
|
||||
if (scanRunner_) {
|
||||
folly::coro::blockingWait(scanRunner_->stopAll());
|
||||
scanRunner_.reset();
|
||||
}
|
||||
if (scanWorkers_) {
|
||||
scanWorkers_->stopAndJoin();
|
||||
scanWorkers_.reset();
|
||||
}
|
||||
if (closeWorkers_) {
|
||||
closeWorkers_->stopAndJoin();
|
||||
closeWorkers_.reset();
|
||||
}
|
||||
|
||||
XLOGF(INFO, "SessionManager stopped.");
|
||||
}
|
||||
|
||||
CoTask<void> SessionManager::scanTask() {
|
||||
if (!config_.enable()) {
|
||||
XLOGF_EVERY_MS(INFO, 10000, "SessionManager scan disabled");
|
||||
co_return;
|
||||
}
|
||||
if (!isFirstMeta(*mgmtd_, nodeId_)) {
|
||||
co_return;
|
||||
}
|
||||
XLOGF(INFO, "MetaServer {} is first active meta, scan sessions", nodeId_);
|
||||
|
||||
auto prune = co_await loadPrune();
|
||||
if (!prune) {
|
||||
XLOGF(ERR, "Failed to load sessions need to be pruned");
|
||||
co_return;
|
||||
}
|
||||
for (size_t shard = 0; shard < FileSession::kShard; shard++) {
|
||||
co_await scanWorkers_->enqueue(std::make_unique<ScanTask>(shard, *prune));
|
||||
}
|
||||
|
||||
co_return;
|
||||
}
|
||||
|
||||
CoTryTask<std::shared_ptr<SessionManager::PruneSessions>> SessionManager::loadPrune() {
|
||||
auto result = co_await kv::WithTransaction(kv::FDBRetryStrategy{})
|
||||
.run(kvEngine_->createReadonlyTransaction(), [&](auto &txn) -> CoTryTask<std::vector<FileSession>> {
|
||||
co_return co_await FileSession::listPrune(txn, 128 << 10 /* at most 128k sessions to prune */);
|
||||
});
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
XLOGF_IF(INFO, !result->empty(), "SessionManager found {} sessions to prune", result->size());
|
||||
|
||||
auto prune = std::make_shared<PruneSessions>();
|
||||
auto guard = prune->sessions.wlock();
|
||||
for (auto &session : *result) {
|
||||
guard->emplace(session.sessionId, session);
|
||||
}
|
||||
co_return prune;
|
||||
}
|
||||
|
||||
CoTryTask<std::vector<FileSession>> SessionManager::listSessions() {
|
||||
// todo: should we add this?
|
||||
std::vector<FileSession> sessions;
|
||||
for (size_t shard = 0; shard < FileSession::kShard; shard++) {
|
||||
std::optional<FileSession> prev;
|
||||
while (true) {
|
||||
auto result =
|
||||
co_await kv::WithTransaction(kv::FDBRetryStrategy{})
|
||||
.run(kvEngine_->createReadonlyTransaction(), [&](auto &txn) -> CoTryTask<std::vector<FileSession>> {
|
||||
co_return co_await FileSession::scan(txn, shard, prev);
|
||||
});
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
if (result->empty()) {
|
||||
break;
|
||||
}
|
||||
prev = result->back();
|
||||
sessions.insert(sessions.end(), result->begin(), result->end());
|
||||
}
|
||||
}
|
||||
co_return sessions;
|
||||
}
|
||||
|
||||
CoTryTask<std::vector<FileSession>> SessionManager::listSessions(InodeId inodeId) {
|
||||
auto txn = kvEngine_->createReadonlyTransaction();
|
||||
auto handler = [&](IReadOnlyTransaction &txn) { return FileSession::list(txn, inodeId, true); };
|
||||
co_return co_await kv::WithTransaction<kv::FDBRetryStrategy>({}).run(std::move(txn), handler);
|
||||
}
|
||||
|
||||
CoTryTask<size_t> SessionManager::pruneManually() {
|
||||
XLOGF(INFO, "SessionManager pruneManually");
|
||||
|
||||
auto prune = co_await loadPrune();
|
||||
CO_RETURN_ON_ERROR(prune);
|
||||
|
||||
size_t total = 0;
|
||||
for (size_t shard = 0; shard < FileSession::kShard; shard++) {
|
||||
auto task = ScanTask(shard, *prune);
|
||||
auto result = co_await task.run(*this);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
total += *result;
|
||||
}
|
||||
co_return total;
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
145
src/meta/components/SessionManager.h
Normal file
145
src/meta/components/SessionManager.h
Normal file
@@ -0,0 +1,145 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Executor.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/functional/Invoke.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "client/mgmtd/IMgmtdClientForServer.h"
|
||||
#include "common/app/ClientId.h"
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/CoroutinesPool.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/String.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "common/utils/Uuid.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/store/FileSession.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class FileHelper;
|
||||
|
||||
class SessionManager {
|
||||
public:
|
||||
class Config : public ConfigBase<Config> {
|
||||
CONFIG_HOT_UPDATED_ITEM(enable, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(scan_interval, 5_min);
|
||||
CONFIG_HOT_UPDATED_ITEM(scan_batch, 1024u);
|
||||
CONFIG_HOT_UPDATED_ITEM(sync_on_prune_session, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(session_timeout, 5_min);
|
||||
CONFIG_OBJ(scan_workers, CoroutinesPoolBase::Config, [](auto &c) {
|
||||
c.set_coroutines_num(8);
|
||||
c.set_queue_size(128);
|
||||
});
|
||||
CONFIG_OBJ(close_workers, CoroutinesPoolBase::Config, [](auto &c) {
|
||||
c.set_coroutines_num(32);
|
||||
c.set_queue_size(1024);
|
||||
});
|
||||
};
|
||||
|
||||
SessionManager(const Config &cfg,
|
||||
flat::NodeId nodeId,
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine,
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtd,
|
||||
std::shared_ptr<FileHelper> fileHelper)
|
||||
: config_(cfg),
|
||||
nodeId_(nodeId),
|
||||
kvEngine_(kvEngine),
|
||||
mgmtd_(mgmtd),
|
||||
fileHelper_(fileHelper) {}
|
||||
~SessionManager() { stopAndJoin(); }
|
||||
|
||||
void start(CPUExecutorGroup &exec);
|
||||
void stopAndJoin();
|
||||
|
||||
using CloseFunc = std::function<CoTryTask<void>(const meta::CloseReq &)>;
|
||||
void setCloseFunc(CloseFunc close) { close_ = close; }
|
||||
|
||||
// for admin_cli
|
||||
CoTryTask<std::vector<FileSession>> listSessions();
|
||||
CoTryTask<std::vector<FileSession>> listSessions(InodeId inodeId);
|
||||
CoTryTask<size_t> pruneManually();
|
||||
|
||||
private:
|
||||
struct PruneSessions {
|
||||
std::atomic<size_t> finished = 0;
|
||||
folly::Synchronized<std::map<Uuid, FileSession>> sessions;
|
||||
};
|
||||
|
||||
class ScanTask {
|
||||
public:
|
||||
ScanTask(size_t shard, std::shared_ptr<PruneSessions> prune)
|
||||
: shard_(shard),
|
||||
prune_(prune) {}
|
||||
|
||||
CoTryTask<size_t> run(SessionManager &manager);
|
||||
|
||||
private:
|
||||
size_t shard_ = -1;
|
||||
std::shared_ptr<PruneSessions> prune_;
|
||||
};
|
||||
|
||||
// try to close and sync
|
||||
class CloseTask {
|
||||
public:
|
||||
CloseTask(FileSession session)
|
||||
: session_(std::move(session)) {}
|
||||
CoTryTask<void> run(SessionManager &manager);
|
||||
|
||||
private:
|
||||
FileSession session_;
|
||||
};
|
||||
|
||||
CoTask<void> scanTask();
|
||||
CoTryTask<std::shared_ptr<PruneSessions>> loadPrune();
|
||||
|
||||
const Config &config_;
|
||||
flat::NodeId nodeId_;
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine_;
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtd_;
|
||||
std::shared_ptr<FileHelper> fileHelper_;
|
||||
std::unique_ptr<BackgroundRunner> scanRunner_;
|
||||
std::unique_ptr<CoroutinesPool<std::unique_ptr<ScanTask>>> scanWorkers_;
|
||||
std::unique_ptr<CoroutinesPool<std::unique_ptr<CloseTask>>> closeWorkers_;
|
||||
CloseFunc close_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
|
||||
FMT_BEGIN_NAMESPACE
|
||||
|
||||
template <>
|
||||
struct formatter<hf3fs::meta::server::FileSession> : formatter<std::string_view> {
|
||||
template <typename FormatContext>
|
||||
auto format(const hf3fs::meta::server::FileSession &session, FormatContext &ctx) const {
|
||||
return format_to(ctx.out(),
|
||||
"{{inodeId {}, client {}, session {}}}",
|
||||
session.inodeId,
|
||||
session.clientId,
|
||||
session.sessionId);
|
||||
}
|
||||
};
|
||||
|
||||
FMT_END_NAMESPACE
|
||||
71
src/meta/event/Event.cc
Normal file
71
src/meta/event/Event.cc
Normal file
@@ -0,0 +1,71 @@
|
||||
#include "meta/event/Event.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <folly/json.h>
|
||||
#include <folly/logging/Logger.h>
|
||||
#include <folly/logging/LoggerDB.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
|
||||
#include "common/utils/MagicEnum.hpp"
|
||||
#include "common/utils/Result.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
namespace {
|
||||
folly::Logger create("eventlog.Create");
|
||||
folly::Logger mkdir("eventlog.Mkdir");
|
||||
folly::Logger hardLink("eventlog.HardLink");
|
||||
folly::Logger remove("eventlog.Remove");
|
||||
folly::Logger truncate("eventlog.Truncate");
|
||||
folly::Logger openWrite("eventlog.OpenWrite");
|
||||
folly::Logger closeWrite("eventlog.CloseWrite");
|
||||
folly::Logger rename("eventlog.Rename");
|
||||
folly::Logger symlink("eventlog.Symlink");
|
||||
folly::Logger gc("eventlog.GC");
|
||||
folly::Logger unknown("eventlog.Unknown");
|
||||
} // namespace
|
||||
|
||||
static folly::Logger &getLogger(Event::Type type) {
|
||||
switch (type) {
|
||||
case Event::Type::Create:
|
||||
return create;
|
||||
case Event::Type::Mkdir:
|
||||
return mkdir;
|
||||
case Event::Type::HardLink:
|
||||
return hardLink;
|
||||
case Event::Type::Remove:
|
||||
return remove;
|
||||
case Event::Type::Truncate:
|
||||
return truncate;
|
||||
case Event::Type::OpenWrite:
|
||||
return openWrite;
|
||||
case Event::Type::CloseWrite:
|
||||
return closeWrite;
|
||||
case Event::Type::Rename:
|
||||
return rename;
|
||||
case Event::Type::Symlink:
|
||||
return symlink;
|
||||
case Event::Type::GC:
|
||||
return gc;
|
||||
}
|
||||
XLOGF(DFATAL, "Unknown type {}", (int)type);
|
||||
return unknown;
|
||||
}
|
||||
|
||||
void Event::log() const {
|
||||
folly::json::serialization_opts opts;
|
||||
opts.pretty_formatting = false;
|
||||
opts.sort_keys = false;
|
||||
return log(opts);
|
||||
}
|
||||
|
||||
void Event::log(const folly::json::serialization_opts &opts) const {
|
||||
try {
|
||||
auto msg = folly::json::serialize(data, opts);
|
||||
auto logger = getLogger(type);
|
||||
FB_LOG(logger, INFO, msg);
|
||||
} catch (folly::json::print_error &exception) {
|
||||
XLOGF(ERR, "Event failed to serialize to json, type {}, error {}", magic_enum::enum_name(type), exception.what());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
74
src/meta/event/Event.h
Normal file
74
src/meta/event/Event.h
Normal file
@@ -0,0 +1,74 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <folly/Utility.h>
|
||||
#include <folly/dynamic.h>
|
||||
#include <folly/hash/Checksum.h>
|
||||
#include <folly/json.h>
|
||||
#include <folly/logging/Logger.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <limits>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/MagicEnum.hpp"
|
||||
#include "common/utils/Path.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/mgmtd/MgmtdTypes.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
struct Event {
|
||||
enum class Type { Create, Mkdir, HardLink, Remove, Truncate, OpenWrite, CloseWrite, Rename, Symlink, GC };
|
||||
|
||||
Type type;
|
||||
folly::dynamic data;
|
||||
|
||||
Event(Type type)
|
||||
: Event(type, folly::dynamic::object()) {
|
||||
addField("event", magic_enum::enum_name(type));
|
||||
addField("ts", UtcClock::now().toMicroseconds());
|
||||
}
|
||||
Event(Type type, folly::dynamic data)
|
||||
: type(type),
|
||||
data(std::move(data)) {}
|
||||
|
||||
void log() const;
|
||||
void log(const folly::json::serialization_opts &opts) const;
|
||||
|
||||
Event &addField(folly::dynamic key, folly::dynamic value) {
|
||||
data.insert(std::move(key), std::move(value));
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
struct MetaEventTrace {
|
||||
SERDE_STRUCT_FIELD(eventType, Event::Type::Create);
|
||||
SERDE_STRUCT_FIELD(inodeId, InodeId());
|
||||
SERDE_STRUCT_FIELD(parentId, InodeId());
|
||||
SERDE_STRUCT_FIELD(entryName, std::string());
|
||||
SERDE_STRUCT_FIELD(dstParentId, InodeId());
|
||||
SERDE_STRUCT_FIELD(dstEntryName, std::string());
|
||||
SERDE_STRUCT_FIELD(ownerId, Uid(0));
|
||||
SERDE_STRUCT_FIELD(userId, Uid());
|
||||
SERDE_STRUCT_FIELD(client, ClientId{Uuid::zero()});
|
||||
SERDE_STRUCT_FIELD(tableId, flat::ChainTableId());
|
||||
SERDE_STRUCT_FIELD(inodeType, InodeType::File);
|
||||
SERDE_STRUCT_FIELD(nlink, uint16_t(0));
|
||||
SERDE_STRUCT_FIELD(length, uint64_t(0));
|
||||
SERDE_STRUCT_FIELD(truncateVer, uint64_t(0));
|
||||
SERDE_STRUCT_FIELD(dynStripe, uint32_t(0));
|
||||
SERDE_STRUCT_FIELD(oflags, OpenFlags());
|
||||
SERDE_STRUCT_FIELD(recursiveRemove, false);
|
||||
SERDE_STRUCT_FIELD(removedChunks, size_t(0));
|
||||
SERDE_STRUCT_FIELD(pruneSession, false);
|
||||
SERDE_STRUCT_FIELD(symLinkTarget, Path());
|
||||
SERDE_STRUCT_FIELD(origPath, Path());
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
285
src/meta/event/Scan.cc
Normal file
285
src/meta/event/Scan.cc
Normal file
@@ -0,0 +1,285 @@
|
||||
#include "meta/event/Scan.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <fmt/format.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/experimental/coro/AsyncGenerator.h>
|
||||
#include <folly/experimental/coro/Collect.h>
|
||||
#include <folly/experimental/coro/CurrentExecutor.h>
|
||||
#include <folly/experimental/coro/Invoke.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <folly/init/Init.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/logging/LogInit.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/ExponentialBackoffRetry.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fdb/FDB.h"
|
||||
#include "fdb/FDBKVEngine.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
static ExponentialBackoffRetry createBackoff(const MetaScan::Options &options) {
|
||||
return ExponentialBackoffRetry(std::chrono::milliseconds((int)(options.backoff_min_wait * 1000)),
|
||||
std::chrono::milliseconds((int)(options.backoff_max_wait * 1000)),
|
||||
std::chrono::milliseconds((int)(options.backoff_total_wait * 1000)));
|
||||
}
|
||||
|
||||
CoTryTask<kv::IReadOnlyTransaction::GetRangeResult> MetaScan::KeyRange::snapshotGetRange(kv::IReadOnlyTransaction &txn,
|
||||
int32_t limit) {
|
||||
XLOGF(DBG, "MetaScan snapshotGetRange {}", describe());
|
||||
auto result = co_await txn.snapshotGetRange({begin, true}, {end, false}, limit);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
hasMore = result->hasMore;
|
||||
if (!result->kvs.empty()) {
|
||||
begin = kv::TransactionHelper::keyAfter(result->kvs.rbegin()->key);
|
||||
}
|
||||
co_return result;
|
||||
}
|
||||
|
||||
std::vector<MetaScan::KeyRange> MetaScan::KeyRange::split(std::string prefix) {
|
||||
std::vector<KeyRange> ranges;
|
||||
unsigned char c = 0;
|
||||
do {
|
||||
std::string begin = prefix + (char)c;
|
||||
std::string end;
|
||||
if (c != 0xff) {
|
||||
end = prefix + (char)(c + 1);
|
||||
} else {
|
||||
end = kv::TransactionHelper::prefixListEndKey(prefix);
|
||||
}
|
||||
ranges.push_back({begin, end});
|
||||
|
||||
c += 1;
|
||||
} while (c != 0);
|
||||
|
||||
return ranges;
|
||||
}
|
||||
|
||||
MetaScan::MetaScan(Options options, std::shared_ptr<kv::IKVEngine> kvEngine)
|
||||
: options_(options),
|
||||
kvEngine_(kvEngine),
|
||||
exec_(std::pair<size_t, size_t>{options.threads, options.threads},
|
||||
std::make_shared<folly::NamedThreadFactory>("Scan")) {
|
||||
if (options_.threads < 0 || options_.coroutines < 0) {
|
||||
throw std::runtime_error("Invalid options, thread < 0 or coroutines < 0");
|
||||
}
|
||||
if (!kvEngine && options_.fdb_cluster_file.empty()) {
|
||||
throw std::runtime_error("Should set kvEngine or fdb cluster file");
|
||||
}
|
||||
if (!options_.logging.empty()) {
|
||||
XLOGF(INFO, "Setup log: {}", options_.logging);
|
||||
logging::initOrDie(options_.logging);
|
||||
}
|
||||
|
||||
createKVEngine();
|
||||
}
|
||||
|
||||
MetaScan::~MetaScan() {
|
||||
if (scanInodeTask_.has_value()) {
|
||||
scanInodeTask_->cancel.requestCancellation();
|
||||
}
|
||||
if (scanDirEntryTask_.has_value()) {
|
||||
scanDirEntryTask_->cancel.requestCancellation();
|
||||
}
|
||||
exec_.stop();
|
||||
if (fdbNetwork_) {
|
||||
kv::fdb::DB::stopNetwork();
|
||||
fdbNetwork_->join();
|
||||
}
|
||||
}
|
||||
|
||||
void MetaScan::createKVEngine() {
|
||||
if (kvEngine_) {
|
||||
return;
|
||||
}
|
||||
|
||||
kv::fdb::DB::selectAPIVersion(FDB_API_VERSION);
|
||||
auto error = kv::fdb::DB::setupNetwork();
|
||||
if (error) {
|
||||
throw std::runtime_error(fmt::format("Failed to setup fdb network, error {}", kv::fdb::DB::errorMsg(error)));
|
||||
}
|
||||
fdbNetwork_ = std::jthread([&]() { kv::fdb::DB::runNetwork(); });
|
||||
kvEngine_ = std::make_shared<kv::FDBKVEngine>(kv::fdb::DB(options_.fdb_cluster_file, true /* readonly */));
|
||||
}
|
||||
|
||||
std::vector<Inode> MetaScan::getInodes() {
|
||||
std::scoped_lock<std::mutex> lock(mutex_);
|
||||
|
||||
if (!scanInodeTask_) {
|
||||
scanInodeTask_.emplace(256);
|
||||
scanInodeTask_->future = scanInode(*scanInodeTask_).scheduleOn(&exec_).start();
|
||||
}
|
||||
return waitResult(scanInodeTask_);
|
||||
}
|
||||
|
||||
std::vector<DirEntry> MetaScan::getDirEntries() {
|
||||
std::scoped_lock<std::mutex> lock(mutex_);
|
||||
|
||||
if (!scanDirEntryTask_) {
|
||||
scanDirEntryTask_.emplace(256);
|
||||
scanDirEntryTask_->future = scanDirEntry(*scanDirEntryTask_).scheduleOn(&exec_).start();
|
||||
}
|
||||
return waitResult(scanDirEntryTask_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> MetaScan::waitResult(std::optional<BackgroundTask<T>> &task) {
|
||||
std::vector<T> vec;
|
||||
while (true) {
|
||||
// dequeue
|
||||
while (true) {
|
||||
auto result = task->queue.try_dequeue();
|
||||
if (!result.has_value()) {
|
||||
break;
|
||||
}
|
||||
if (vec.empty()) {
|
||||
vec = std::move(*result);
|
||||
} else {
|
||||
vec.insert(vec.end(), std::make_move_iterator(result->begin()), std::make_move_iterator(result->end()));
|
||||
}
|
||||
}
|
||||
|
||||
// return items
|
||||
if (vec.size() > 64) {
|
||||
return vec;
|
||||
}
|
||||
if (task->future.isReady()) {
|
||||
if (!vec.empty()) {
|
||||
return vec;
|
||||
}
|
||||
if (task->future.valid()) {
|
||||
auto &result = task->future.result();
|
||||
if (result.value().hasError()) {
|
||||
throw std::runtime_error(result.value().error().describe());
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
std::this_thread::sleep_for(10_ms);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
CoTryTask<void> MetaScan::scanRange(KeyRange range, BackgroundTask<T> &task) {
|
||||
auto originRange = range;
|
||||
XLOGF(INFO, "Worker scan range {}", originRange.describe());
|
||||
|
||||
size_t total = 0;
|
||||
|
||||
auto txn = kvEngine_->createReadonlyTransaction();
|
||||
auto txnCreateTime = RelativeTime::now();
|
||||
while (range.hasMore) {
|
||||
auto retry = createBackoff(options_);
|
||||
while (true) {
|
||||
// todo: tune FDB transaction get range mode
|
||||
auto result = co_await range.snapshotGetRange(*txn, options_.items_per_getrange);
|
||||
|
||||
// handle error
|
||||
if (result.hasError()) {
|
||||
auto wait = retry.getWaitTime();
|
||||
if (wait.count() == 0) {
|
||||
XLOGF(ERR, "Failed to get range {} after retry {}ms", range.describe(), retry.getElapsedTime().count());
|
||||
CO_RETURN_ERROR(result);
|
||||
}
|
||||
if (result.error().code() == TransactionCode::kTooOld) {
|
||||
txnCreateTime = RelativeTime::now();
|
||||
txn->reset();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// deserialize and continue
|
||||
std::vector<T> items;
|
||||
items.reserve(result->kvs.size());
|
||||
for (auto [key, value] : result->kvs) {
|
||||
auto unpacked = T::newUnpacked(key, value);
|
||||
if (unpacked.hasError()) {
|
||||
// todo: maybe should return error here, or give caller a statistic data?
|
||||
XLOGF(FATAL, "Failed to deserialize key {:02x}, value {:02x}", fmt::join(key, ""), fmt::join(value, ""));
|
||||
} else {
|
||||
items.emplace_back(std::move(*unpacked));
|
||||
}
|
||||
}
|
||||
if (!items.empty()) {
|
||||
total += items.size();
|
||||
co_await folly::coro::co_withCancellation(task.cancel.getToken(), task.queue.enqueue(std::move(items)));
|
||||
}
|
||||
|
||||
// reset transaction to avoid transaction too old
|
||||
if (RelativeTime::now() - txnCreateTime > std::chrono::seconds(3)) {
|
||||
txnCreateTime = RelativeTime::now();
|
||||
txn->reset();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
XLOGF(INFO, "Worker finished scan range {}, found {} kvs", originRange.describe(), total);
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
CoTryTask<void> MetaScan::scan(kv::KeyPrefix prefix, BackgroundTask<T> &task) {
|
||||
auto prefixStr = std::string(kv::toStr(prefix));
|
||||
auto ranges = KeyRange::split(prefixStr);
|
||||
folly::Synchronized<std::queue<KeyRange>, std::mutex> taskQueue;
|
||||
for (auto &range : ranges) {
|
||||
taskQueue.lock()->push(range);
|
||||
}
|
||||
|
||||
auto exec = co_await folly::coro::co_current_executor;
|
||||
std::vector<folly::SemiFuture<Result<Void>>> workers;
|
||||
for (auto i = 0; i < options_.coroutines; i++) {
|
||||
auto worker = folly::coro::co_invoke([&]() -> CoTryTask<void> {
|
||||
KeyRange range;
|
||||
while (true) {
|
||||
{
|
||||
auto guard = taskQueue.lock();
|
||||
if (guard->empty()) {
|
||||
co_return Void{};
|
||||
}
|
||||
range = guard->front();
|
||||
guard->pop();
|
||||
}
|
||||
auto result = co_await scanRange(range, task);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
}
|
||||
co_return Void{};
|
||||
});
|
||||
workers.push_back(std::move(worker).scheduleOn(exec).start());
|
||||
}
|
||||
|
||||
auto results = co_await folly::coro::collectAllRange(std::move(workers));
|
||||
for (auto result : results) {
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> MetaScan::scanInode(BackgroundTask<Inode> &task) {
|
||||
co_return co_await scan(kv::KeyPrefix::Inode, task);
|
||||
}
|
||||
|
||||
CoTryTask<void> MetaScan::scanDirEntry(BackgroundTask<DirEntry> &task) {
|
||||
co_return co_await scan(kv::KeyPrefix::Dentry, task);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
115
src/meta/event/Scan.h
Normal file
115
src/meta/event/Scan.h
Normal file
@@ -0,0 +1,115 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <exception>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/CancellationToken.h>
|
||||
#include <folly/Function.h>
|
||||
#include <folly/MPMCQueue.h>
|
||||
#include <folly/concurrency/UnboundedQueue.h>
|
||||
#include <folly/executors/CPUThreadPoolExecutor.h>
|
||||
#include <folly/experimental/coro/BoundedQueue.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class MetaScan {
|
||||
public:
|
||||
struct Options {
|
||||
// scan options
|
||||
int threads = 4;
|
||||
int coroutines = 8;
|
||||
int items_per_getrange = -1;
|
||||
double backoff_min_wait = 0.1; // 100ms
|
||||
double backoff_max_wait = 5; // 5s
|
||||
double backoff_total_wait = 60; // 60s
|
||||
// log level
|
||||
std::string logging;
|
||||
// create FDB client with given config path
|
||||
std::string fdb_cluster_file;
|
||||
};
|
||||
|
||||
MetaScan(Options options,
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine = {} /* create new fdb client if kvEngine is not set */);
|
||||
~MetaScan();
|
||||
|
||||
std::vector<Inode> getInodes();
|
||||
std::vector<DirEntry> getDirEntries();
|
||||
|
||||
kv::IKVEngine &kvEngine() { return *kvEngine_; }
|
||||
|
||||
private:
|
||||
struct KeyRange {
|
||||
std::string begin;
|
||||
std::string end;
|
||||
bool hasMore;
|
||||
|
||||
KeyRange()
|
||||
: begin(),
|
||||
end(),
|
||||
hasMore(false) {}
|
||||
KeyRange(std::string begin, std::string end)
|
||||
: begin(std::move(begin)),
|
||||
end(std::move(end)),
|
||||
hasMore(true) {}
|
||||
|
||||
static std::vector<KeyRange> split(std::string prefix);
|
||||
|
||||
CoTryTask<kv::IReadOnlyTransaction::GetRangeResult> snapshotGetRange(kv::IReadOnlyTransaction &txn, int32_t limit);
|
||||
|
||||
std::string describe() const {
|
||||
return fmt::format("[begin {:02x}, end {:02x}, hasMore {}]", fmt::join(begin, ""), fmt::join(end, ""), hasMore);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct BackgroundTask {
|
||||
folly::coro::BoundedQueue<std::vector<T>> queue;
|
||||
folly::SemiFuture<Result<Void>> future;
|
||||
folly::CancellationSource cancel;
|
||||
|
||||
BackgroundTask(size_t cap)
|
||||
: queue(cap),
|
||||
future(folly::SemiFuture<Result<Void>>::makeEmpty()),
|
||||
cancel() {}
|
||||
};
|
||||
|
||||
void createKVEngine();
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> waitResult(std::optional<BackgroundTask<T>> &task);
|
||||
|
||||
CoTryTask<void> scanInode(BackgroundTask<Inode> &task);
|
||||
CoTryTask<void> scanDirEntry(BackgroundTask<DirEntry> &task);
|
||||
|
||||
template <typename T>
|
||||
CoTryTask<void> scan(kv::KeyPrefix prefix, BackgroundTask<T> &task);
|
||||
|
||||
template <typename T>
|
||||
CoTryTask<void> scanRange(KeyRange range, BackgroundTask<T> &task);
|
||||
|
||||
std::mutex mutex_;
|
||||
Options options_;
|
||||
std::optional<std::jthread> fdbNetwork_;
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine_;
|
||||
folly::CPUThreadPoolExecutor exec_;
|
||||
std::optional<BackgroundTask<Inode>> scanInodeTask_;
|
||||
std::optional<BackgroundTask<DirEntry>> scanDirEntryTask_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
8
src/meta/meta.cpp
Normal file
8
src/meta/meta.cpp
Normal file
@@ -0,0 +1,8 @@
|
||||
#include "common/app/TwoPhaseApplication.h"
|
||||
#include "memory/common/OverrideCppNewDelete.h"
|
||||
#include "meta/service/MetaServer.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace hf3fs;
|
||||
return TwoPhaseApplication<meta::server::MetaServer>().run(argc, argv);
|
||||
}
|
||||
439
src/meta/service/MetaOperator.cc
Normal file
439
src/meta/service/MetaOperator.cc
Normal file
@@ -0,0 +1,439 @@
|
||||
#include "meta/service/MetaOperator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <fcntl.h>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Conv.h>
|
||||
#include <folly/Expected.h>
|
||||
#include <folly/Overload.h>
|
||||
#include <folly/Random.h>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/experimental/coro/BlockingWait.h>
|
||||
#include <folly/experimental/coro/Invoke.h>
|
||||
#include <folly/experimental/coro/Sleep.h>
|
||||
#include <folly/functional/Invoke.h>
|
||||
#include <folly/functional/Partial.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/WithTransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/serde/ClientContext.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/CoroutinesPool.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/RobinHood.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "core/user/UserToken.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "fbs/meta/Utils.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
#include "meta/components/ChainAllocator.h"
|
||||
#include "meta/components/Distributor.h"
|
||||
#include "meta/components/FileHelper.h"
|
||||
#include "meta/components/Forward.h"
|
||||
#include "meta/components/InodeIdAllocator.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/store/Idempotent.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/PathResolve.h"
|
||||
#include "meta/store/Utils.h"
|
||||
#include "meta/store/ops/BatchOperation.h"
|
||||
|
||||
#define AUTHENTICATE(user) \
|
||||
do { \
|
||||
if (config_.authenticate()) { \
|
||||
CO_RETURN_ON_ERROR(co_await authenticate(user)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
template <typename Func, typename Arg>
|
||||
auto MetaOperator::runOp(Func &&func, Arg &&arg)
|
||||
-> CoTryTask<typename std::invoke_result_t<Func, MetaStore, Arg &&>::element_type::RspT> {
|
||||
#ifndef NDEBUG
|
||||
auto fi = FaultInjection::clone();
|
||||
#endif
|
||||
auto deadline = std::optional<SteadyTime>();
|
||||
if constexpr (std::is_base_of_v<ReqBase, std::remove_reference_t<Arg>>) {
|
||||
CO_RETURN_ON_ERROR(arg.valid());
|
||||
if (config_.operation_timeout() != 0_s) {
|
||||
deadline = SteadyClock::now() + config_.operation_timeout();
|
||||
}
|
||||
}
|
||||
auto txn = kvEngine_->createReadWriteTransaction();
|
||||
auto op = ((*metaStore_).*func)(std::forward<Arg>(arg));
|
||||
auto driver = OperationDriver(*op, arg, deadline);
|
||||
co_return co_await driver.run(std::move(txn), createRetryConfig(), config_.readonly(), config_.grv_cache());
|
||||
}
|
||||
|
||||
CoTryTask<Inode> MetaOperator::runBatch(InodeId inodeId,
|
||||
std::unique_ptr<BatchedOp> op,
|
||||
std::optional<SteadyTime> deadline) {
|
||||
#ifndef NDEBUG
|
||||
auto fi = FaultInjection::clone();
|
||||
#endif
|
||||
assert(op);
|
||||
auto txn = kvEngine_->createReadWriteTransaction();
|
||||
auto driver = OperationDriver(*op, Void{}, deadline);
|
||||
auto result = co_await driver.run(std::move(txn), createRetryConfig(), config_.readonly(), config_.grv_cache());
|
||||
if (!result.hasError()) {
|
||||
XLOGF_IF(FATAL, inodeId != result->id, "expected {}, get {}", inodeId, result->id);
|
||||
}
|
||||
|
||||
batches_.withLock(
|
||||
[&](auto &map) {
|
||||
auto iter = map.find(op->inodeId_);
|
||||
XLOGF_IF(FATAL, iter == map.end(), "shouldn't happen");
|
||||
if (!iter->second.wakeupNext()) {
|
||||
map.erase(iter);
|
||||
}
|
||||
},
|
||||
op->inodeId_);
|
||||
co_return result;
|
||||
}
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
CoTryTask<Rsp> MetaOperator::runInBatch(InodeId inodeId, Req req) {
|
||||
CO_RETURN_ON_ERROR(req.valid());
|
||||
auto deadline = std::optional<SteadyTime>();
|
||||
if (config_.operation_timeout() != 0_s) {
|
||||
deadline = SteadyClock::now() + config_.operation_timeout();
|
||||
}
|
||||
OperationRecorder::Guard guard(OperationRecorder::server(), MetaSerde<>::getRpcName(req), req.user.uid);
|
||||
BatchedOp::Waiter<Req, Rsp> waiter(std::move(req));
|
||||
auto op = addBatchReq(inodeId, waiter);
|
||||
co_await waiter.baton;
|
||||
if (op) {
|
||||
co_await runBatch(inodeId, std::move(op), deadline);
|
||||
}
|
||||
auto result = waiter.getResult();
|
||||
guard.finish(result);
|
||||
co_return result;
|
||||
}
|
||||
|
||||
MetaOperator::MetaOperator(const Config &cfg,
|
||||
flat::NodeId nodeId,
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine,
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient,
|
||||
std::shared_ptr<storage::client::StorageClient> storageClient,
|
||||
std::unique_ptr<Forward> forward)
|
||||
: config_(cfg),
|
||||
nodeId_(nodeId),
|
||||
metaEventTraceLog_(config_.event_trace_log()),
|
||||
kvEngine_(kvEngine),
|
||||
mgmtd_(mgmtdClient),
|
||||
distributor_(std::make_shared<Distributor>(cfg.distributor(), nodeId, kvEngine)),
|
||||
userStore_(std::make_shared<core::UserStoreEx>(*kvEngine_, config_.retry_transaction(), config_.user_cache())),
|
||||
inodeIdAlloc_(InodeIdAllocator::create(kvEngine)),
|
||||
chainAlloc_(std::make_shared<ChainAllocator>(mgmtdClient)),
|
||||
fileHelper_(std::make_shared<FileHelper>(cfg, mgmtdClient, storageClient)),
|
||||
sessionManager_(
|
||||
std::make_shared<SessionManager>(cfg.session_manager(), nodeId, kvEngine_, mgmtdClient, fileHelper_)),
|
||||
gcManager_(std::make_shared<GcManager>(cfg,
|
||||
nodeId,
|
||||
metaEventTraceLog_,
|
||||
kvEngine_,
|
||||
mgmtdClient,
|
||||
inodeIdAlloc_,
|
||||
fileHelper_,
|
||||
sessionManager_,
|
||||
userStore_)),
|
||||
forward_(std::move(forward)),
|
||||
metaStore_(std::make_unique<MetaStore>(cfg,
|
||||
metaEventTraceLog_,
|
||||
distributor_,
|
||||
inodeIdAlloc_,
|
||||
chainAlloc_,
|
||||
fileHelper_,
|
||||
sessionManager_,
|
||||
gcManager_)) {
|
||||
sessionManager_->setCloseFunc(
|
||||
[&](const auto &req) -> CoTryTask<void> { co_return (co_await close(req)).then([](auto &) { return Void{}; }); });
|
||||
}
|
||||
|
||||
CoTryTask<void> MetaOperator::init(std::optional<Layout> layout) {
|
||||
XLOGF(INFO, "MetaOperator::init");
|
||||
if (layout.has_value()) {
|
||||
CO_RETURN_ON_ERROR(co_await runOp(&MetaStore::initFs, *layout));
|
||||
}
|
||||
|
||||
if (!metaEventTraceLog_.open()) {
|
||||
XLOGF(CRITICAL, "Failed to open trace log in directory: {}", config_.event_trace_log().trace_file_dir());
|
||||
co_return makeError(StatusCode::kIOError);
|
||||
}
|
||||
|
||||
CO_RETURN_ON_ERROR(co_await gcManager_->init());
|
||||
XLOGF(INFO, "MetaOperator::init success.");
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
void MetaOperator::start(CPUExecutorGroup &exec) {
|
||||
XLOGF(INFO, "MetaOperator::start");
|
||||
|
||||
distributor_->start(exec);
|
||||
fileHelper_->start(exec);
|
||||
gcManager_->start(exec);
|
||||
sessionManager_->start(exec);
|
||||
|
||||
bgRunner_ = std::make_unique<BackgroundRunner>(&exec.randomPick());
|
||||
|
||||
bgRunner_->start(
|
||||
"idempotent_clean",
|
||||
[&]() -> CoTask<void> {
|
||||
if (!isFirstMeta(*mgmtd_, nodeId_)) co_return;
|
||||
|
||||
auto prev = std::optional<std::string>();
|
||||
size_t total = 0, cleaned = 0;
|
||||
auto more = true;
|
||||
while (more && !stop_) {
|
||||
size_t t = 0, c = 0;
|
||||
auto strategy = kv::FDBRetryStrategy(createRetryConfig());
|
||||
auto txn = kvEngine_->createReadWriteTransaction();
|
||||
auto result = co_await kv::WithTransaction(strategy).run(
|
||||
std::move(txn),
|
||||
[&](kv::IReadWriteTransaction &txn) -> CoTryTask<std::pair<std::string, bool>> {
|
||||
co_return co_await Idempotent::clean(txn, prev, config_.idempotent_record_expire(), 2048, t, c);
|
||||
});
|
||||
if (!result) {
|
||||
XLOGF(ERR, "Clean idempotent record failed, {}", result.error());
|
||||
break;
|
||||
}
|
||||
total += t;
|
||||
cleaned += c;
|
||||
prev = result->first;
|
||||
more = result->second;
|
||||
}
|
||||
XLOGF(INFO, "Clean idempotent record, total {}, cleaned {}", total, cleaned);
|
||||
co_return;
|
||||
},
|
||||
config_.idempotent_record_clean_getter());
|
||||
}
|
||||
|
||||
void MetaOperator::beforeStop() {
|
||||
XLOGF(INFO, "MetaOperator::beforeStop");
|
||||
stop_ = true;
|
||||
if (distributor_) {
|
||||
distributor_->stopAndJoin(true);
|
||||
}
|
||||
XLOGF(INFO, "MetaOperator::beforeStop finished");
|
||||
}
|
||||
|
||||
void MetaOperator::afterStop() {
|
||||
XLOGF(INFO, "MetaOperator::afterStop");
|
||||
if (bgRunner_) {
|
||||
folly::coro::blockingWait(bgRunner_->stopAll());
|
||||
bgRunner_.reset();
|
||||
}
|
||||
if (gcManager_) {
|
||||
gcManager_->stopAndJoin();
|
||||
}
|
||||
if (sessionManager_) {
|
||||
sessionManager_->stopAndJoin();
|
||||
}
|
||||
if (fileHelper_) {
|
||||
fileHelper_->stopAndJoin();
|
||||
}
|
||||
metaEventTraceLog_.close();
|
||||
XLOGF(INFO, "MetaOperator::afterStop finished");
|
||||
}
|
||||
|
||||
kv::FDBRetryStrategy::Config MetaOperator::createRetryConfig() const {
|
||||
return kv::FDBRetryStrategy::Config{config_.retry_transaction().max_backoff(),
|
||||
config_.retry_transaction().max_retry_count(),
|
||||
true};
|
||||
}
|
||||
|
||||
CoTryTask<void> MetaOperator::authenticate(UserInfo &userInfo) {
|
||||
static monitor::CountRecorder failed("meta_server.auth_failed");
|
||||
auto guard = folly::makeGuard([&]() {
|
||||
failed.addSample(1, {{"uid", folly::to<std::string>(userInfo.uid.toUnderType())}});
|
||||
});
|
||||
|
||||
auto ret = co_await userStore_->authenticate(userInfo);
|
||||
CO_RETURN_ON_ERROR(ret);
|
||||
|
||||
guard.dismiss();
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<AuthRsp> MetaOperator::authenticate(AuthReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return AuthRsp(std::move(req.user));
|
||||
}
|
||||
|
||||
CoTryTask<StatFsRsp> MetaOperator::statFs(StatFsReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::statFs, req);
|
||||
}
|
||||
|
||||
CoTryTask<StatRsp> MetaOperator::stat(StatReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::stat, req);
|
||||
}
|
||||
|
||||
CoTryTask<BatchStatRsp> MetaOperator::batchStat(BatchStatReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::batchStat, req);
|
||||
}
|
||||
|
||||
CoTryTask<BatchStatByPathRsp> MetaOperator::batchStatByPath(BatchStatByPathReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::batchStatByPath, req);
|
||||
}
|
||||
|
||||
CoTryTask<GetRealPathRsp> MetaOperator::getRealPath(GetRealPathReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::getRealPath, req);
|
||||
}
|
||||
|
||||
CoTryTask<OpenRsp> MetaOperator::open(OpenReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::open, req);
|
||||
}
|
||||
|
||||
CoTryTask<TruncateRsp> MetaOperator::truncate(TruncateReq req) {
|
||||
XLOGF(CRITICAL, "truncate is deperated, update client {}", req.client.hostname);
|
||||
co_return makeError(StatusCode::kNotImplemented, "truncate is deperated, update client");
|
||||
}
|
||||
|
||||
CoTryTask<SyncRsp> MetaOperator::sync(SyncReq req) {
|
||||
// NOTE: don't auth user for sync
|
||||
auto node = distributor_->getServer(req.inode);
|
||||
if (node == distributor_->nodeId()) {
|
||||
auto inodeId = req.inode;
|
||||
co_return co_await runInBatch<SyncReq, SyncRsp>(inodeId, std::move(req));
|
||||
} else {
|
||||
co_return co_await forward_->forward<SyncReq, SyncRsp>(node, std::move(req));
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<CloseRsp> MetaOperator::close(CloseReq req) {
|
||||
// Note: don't auth user here
|
||||
auto node = distributor_->getServer(req.inode);
|
||||
if (node == distributor_->nodeId()) {
|
||||
auto inodeId = req.inode;
|
||||
co_return co_await runInBatch<CloseReq, CloseRsp>(inodeId, std::move(req));
|
||||
} else {
|
||||
co_return co_await forward_->forward<CloseReq, CloseRsp>(node, std::move(req));
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<CreateRsp> MetaOperator::create(CreateReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
CO_RETURN_ON_ERROR(req.valid());
|
||||
|
||||
XLOGF(DBG, "create {}", req);
|
||||
|
||||
if (req.path.path->has_parent_path()) {
|
||||
// try open first.
|
||||
auto result = co_await runOp(&MetaStore::tryOpen, req);
|
||||
if (result.hasValue() || req.path.path->has_parent_path()) {
|
||||
co_return result;
|
||||
}
|
||||
if (!req.valid()) {
|
||||
auto msg = fmt::format("req {} not valid after try open", req);
|
||||
XLOG(DFATAL, msg);
|
||||
co_return makeError(MetaCode::kFoundBug, std::move(msg));
|
||||
}
|
||||
|
||||
XLOGF(DBG, "create {}", req);
|
||||
}
|
||||
|
||||
auto node = distributor_->getServer(req.path.parent);
|
||||
if (node == distributor_->nodeId()) {
|
||||
auto parentId = req.path.parent;
|
||||
co_return co_await runInBatch<CreateReq, CreateRsp>(parentId, std::move(req));
|
||||
} else {
|
||||
co_return co_await forward_->forward<CreateReq, CreateRsp>(node, std::move(req));
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<MkdirsRsp> MetaOperator::mkdirs(MkdirsReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::mkdirs, req);
|
||||
}
|
||||
|
||||
CoTryTask<SymlinkRsp> MetaOperator::symlink(SymlinkReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::symlink, req);
|
||||
}
|
||||
|
||||
CoTryTask<RemoveRsp> MetaOperator::remove(RemoveReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::remove, req);
|
||||
}
|
||||
|
||||
CoTryTask<RenameRsp> MetaOperator::rename(RenameReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::rename, req);
|
||||
}
|
||||
|
||||
CoTryTask<ListRsp> MetaOperator::list(ListReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::list, req);
|
||||
}
|
||||
|
||||
CoTryTask<HardLinkRsp> MetaOperator::hardLink(HardLinkReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::hardLink, req);
|
||||
}
|
||||
|
||||
CoTryTask<SetAttrRsp> MetaOperator::setAttr(SetAttrReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
if (req.path.path) {
|
||||
co_return co_await runOp(&MetaStore::setAttr, req);
|
||||
}
|
||||
|
||||
auto node = distributor_->getServer(req.path.parent);
|
||||
if (node == distributor_->nodeId()) {
|
||||
auto parentId = req.path.parent;
|
||||
co_return co_await runInBatch<SetAttrReq, SetAttrRsp>(parentId, std::move(req));
|
||||
} else {
|
||||
co_return co_await forward_->forward<SetAttrReq, SetAttrRsp>(node, std::move(req));
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<LockDirectoryRsp> MetaOperator::lockDirectory(LockDirectoryReq req) {
|
||||
AUTHENTICATE(req.user);
|
||||
co_return co_await runOp(&MetaStore::lockDirectory, req);
|
||||
}
|
||||
|
||||
CoTryTask<PruneSessionRsp> MetaOperator::pruneSession(PruneSessionReq req) {
|
||||
co_return co_await runOp(&MetaStore::pruneSession, req);
|
||||
}
|
||||
|
||||
CoTryTask<DropUserCacheRsp> MetaOperator::dropUserCache(DropUserCacheReq req) {
|
||||
if (req.dropAll) {
|
||||
userStore_->cache().clear();
|
||||
} else if (req.uid) {
|
||||
userStore_->cache().clear(*req.uid);
|
||||
}
|
||||
co_return DropUserCacheRsp{};
|
||||
}
|
||||
|
||||
CoTryTask<TestRpcRsp> MetaOperator::testRpc(TestRpcReq req) {
|
||||
// don't need auth user
|
||||
co_return co_await runOp(&MetaStore::testRpc, req);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
212
src/meta/service/MetaOperator.h
Normal file
212
src/meta/service/MetaOperator.h
Normal file
@@ -0,0 +1,212 @@
|
||||
#pragma once
|
||||
|
||||
#include <arrow/util/macros.h>
|
||||
#include <atomic>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Utility.h>
|
||||
#include <folly/executors/CPUThreadPoolExecutor.h>
|
||||
#include <folly/experimental/coro/Baton.h>
|
||||
#include <folly/functional/Invoke.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "client/mgmtd/IMgmtdClientForServer.h"
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/WithTransaction.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/CoroutinesPool.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "core/user/UserStoreEx.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
#include "meta/base/Config.h"
|
||||
#include "meta/components/ChainAllocator.h"
|
||||
#include "meta/components/Distributor.h"
|
||||
#include "meta/components/FileHelper.h"
|
||||
#include "meta/components/Forward.h"
|
||||
#include "meta/components/GcManager.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/ops/BatchOperation.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class BatchedOp;
|
||||
|
||||
class MetaOperator : public folly::NonCopyableNonMovable {
|
||||
public:
|
||||
MetaOperator(const Config &cfg,
|
||||
flat::NodeId nodeId,
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine,
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient,
|
||||
std::shared_ptr<storage::client::StorageClient> storageClient,
|
||||
std::unique_ptr<Forward> forward);
|
||||
|
||||
CoTryTask<void> init(std::optional<Layout> rootLayout);
|
||||
|
||||
void start(CPUExecutorGroup &exec);
|
||||
void beforeStop();
|
||||
void afterStop();
|
||||
|
||||
CoTryTask<AuthRsp> authenticate(AuthReq req);
|
||||
|
||||
CoTryTask<StatFsRsp> statFs(StatFsReq req);
|
||||
|
||||
CoTryTask<StatRsp> stat(StatReq req);
|
||||
|
||||
CoTryTask<GetRealPathRsp> getRealPath(GetRealPathReq req);
|
||||
|
||||
CoTryTask<OpenRsp> open(OpenReq req);
|
||||
|
||||
CoTryTask<CloseRsp> close(CloseReq req);
|
||||
|
||||
CoTryTask<CreateRsp> create(CreateReq req);
|
||||
|
||||
CoTryTask<MkdirsRsp> mkdirs(MkdirsReq req);
|
||||
|
||||
CoTryTask<SymlinkRsp> symlink(SymlinkReq req);
|
||||
|
||||
CoTryTask<RemoveRsp> remove(RemoveReq req);
|
||||
|
||||
CoTryTask<RenameRsp> rename(RenameReq req);
|
||||
|
||||
CoTryTask<ListRsp> list(ListReq req);
|
||||
|
||||
CoTryTask<TruncateRsp> truncate(TruncateReq req);
|
||||
|
||||
CoTryTask<SyncRsp> sync(SyncReq req);
|
||||
|
||||
CoTryTask<HardLinkRsp> hardLink(HardLinkReq req);
|
||||
|
||||
CoTryTask<SetAttrRsp> setAttr(SetAttrReq req);
|
||||
|
||||
CoTryTask<PruneSessionRsp> pruneSession(PruneSessionReq req);
|
||||
|
||||
CoTryTask<DropUserCacheRsp> dropUserCache(DropUserCacheReq req);
|
||||
|
||||
CoTryTask<LockDirectoryRsp> lockDirectory(LockDirectoryReq req);
|
||||
|
||||
CoTryTask<TestRpcRsp> testRpc(TestRpcReq req);
|
||||
|
||||
CoTryTask<BatchStatRsp> batchStat(BatchStatReq req);
|
||||
|
||||
CoTryTask<BatchStatByPathRsp> batchStatByPath(BatchStatByPathReq req);
|
||||
|
||||
private:
|
||||
friend class MockMeta;
|
||||
|
||||
template <typename>
|
||||
FRIEND_TEST(TestBatchOp, batch);
|
||||
|
||||
template <typename>
|
||||
FRIEND_TEST(TestCreate, batch);
|
||||
|
||||
class Batch {
|
||||
public:
|
||||
void setNext(BatchedOp *op, folly::coro::Baton *baton) {
|
||||
next = op;
|
||||
nextBaton = baton;
|
||||
}
|
||||
|
||||
bool wakeupNext() {
|
||||
if (!next) {
|
||||
return false;
|
||||
}
|
||||
nextBaton->post();
|
||||
next = nullptr;
|
||||
nextBaton = nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
BatchedOp *getNext() const { return next; }
|
||||
|
||||
private:
|
||||
BatchedOp *next = nullptr;
|
||||
folly::coro::Baton *nextBaton = nullptr;
|
||||
};
|
||||
|
||||
kv::FDBRetryStrategy::Config createRetryConfig() const;
|
||||
kv::FDBRetryStrategy createRetryStrategy() const { return kv::FDBRetryStrategy(createRetryConfig()); }
|
||||
|
||||
template <typename Func, typename Arg>
|
||||
auto runOp(Func &&func, Arg &&arg)
|
||||
-> CoTryTask<typename std::invoke_result_t<Func, MetaStore, Arg &&>::element_type::RspT>;
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
std::unique_ptr<BatchedOp> addBatchReq(InodeId inodeId, BatchedOp::Waiter<Req, Rsp> &waiter) {
|
||||
auto func = [&](auto &map) {
|
||||
auto [iter, inserted] = map.try_emplace(inodeId);
|
||||
auto &batch = iter->second;
|
||||
if (inserted) {
|
||||
assert(!batch.getNext());
|
||||
auto op = std::make_unique<BatchedOp>(*metaStore_, inodeId);
|
||||
op->add(waiter);
|
||||
waiter.baton.post();
|
||||
return op;
|
||||
} else if (!batch.getNext()) {
|
||||
auto op = std::make_unique<BatchedOp>(*metaStore_, inodeId);
|
||||
op->add(waiter);
|
||||
batch.setNext(op.get(), &waiter.baton);
|
||||
return op;
|
||||
} else {
|
||||
auto next = batch.getNext();
|
||||
auto num_reqs = next->numReqs();
|
||||
if (UNLIKELY(config_.max_batch_operations() != 0 && num_reqs >= config_.max_batch_operations())) {
|
||||
auto msg = fmt::format("too many batch operations on {}", inodeId);
|
||||
XLOG(WARN, msg);
|
||||
waiter.result = makeError(MetaCode::kBusy, std::move(msg));
|
||||
waiter.baton.post();
|
||||
} else {
|
||||
if (num_reqs && num_reqs % 1024 == 0) {
|
||||
XLOGF(WARN, "{} batch operations on {}", num_reqs, inodeId);
|
||||
}
|
||||
next->add(waiter);
|
||||
}
|
||||
return std::unique_ptr<BatchedOp>();
|
||||
}
|
||||
};
|
||||
return batches_.withLock(func, inodeId);
|
||||
}
|
||||
|
||||
CoTryTask<Inode> runBatch(InodeId inodeId,
|
||||
std::unique_ptr<BatchedOp> op,
|
||||
std::optional<SteadyTime> deadline = std::nullopt);
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
CoTryTask<Rsp> runInBatch(InodeId inodeId, Req req);
|
||||
|
||||
CoTryTask<void> authenticate(UserInfo &userInfo);
|
||||
|
||||
const Config &config_;
|
||||
flat::NodeId nodeId_;
|
||||
analytics::StructuredTraceLog<MetaEventTrace> metaEventTraceLog_;
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine_;
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtd_;
|
||||
std::shared_ptr<Distributor> distributor_;
|
||||
std::shared_ptr<core::UserStoreEx> userStore_;
|
||||
std::shared_ptr<InodeIdAllocator> inodeIdAlloc_;
|
||||
std::shared_ptr<ChainAllocator> chainAlloc_;
|
||||
std::shared_ptr<FileHelper> fileHelper_;
|
||||
std::shared_ptr<SessionManager> sessionManager_;
|
||||
std::shared_ptr<GcManager> gcManager_;
|
||||
std::unique_ptr<Forward> forward_;
|
||||
|
||||
std::unique_ptr<MetaStore> metaStore_;
|
||||
|
||||
Shards<std::map<InodeId, Batch>, 63> batches_;
|
||||
|
||||
std::atomic_bool stop_{false};
|
||||
std::unique_ptr<BackgroundRunner> bgRunner_;
|
||||
};
|
||||
} // namespace hf3fs::meta::server
|
||||
45
src/meta/service/MetaSerdeService.h
Normal file
45
src/meta/service/MetaSerdeService.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/serde/CallContext.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/service/MetaOperator.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class MetaSerdeService : public serde::ServiceWrapper<MetaSerdeService, MetaSerde> {
|
||||
public:
|
||||
MetaSerdeService(MetaOperator &meta)
|
||||
: meta_(meta) {}
|
||||
|
||||
#define META_SERVICE_METHOD(NAME, REQ, RESP) \
|
||||
CoTryTask<RESP> NAME(serde::CallContext &, const REQ &req) { return meta_.NAME(req); }
|
||||
|
||||
META_SERVICE_METHOD(statFs, StatFsReq, StatFsRsp);
|
||||
META_SERVICE_METHOD(stat, StatReq, StatRsp);
|
||||
META_SERVICE_METHOD(create, CreateReq, CreateRsp);
|
||||
META_SERVICE_METHOD(mkdirs, MkdirsReq, MkdirsRsp);
|
||||
META_SERVICE_METHOD(symlink, SymlinkReq, SymlinkRsp);
|
||||
META_SERVICE_METHOD(hardLink, HardLinkReq, HardLinkRsp);
|
||||
META_SERVICE_METHOD(remove, RemoveReq, RemoveRsp);
|
||||
META_SERVICE_METHOD(open, OpenReq, OpenRsp);
|
||||
META_SERVICE_METHOD(sync, SyncReq, SyncRsp);
|
||||
META_SERVICE_METHOD(close, CloseReq, CloseRsp);
|
||||
META_SERVICE_METHOD(rename, RenameReq, RenameRsp);
|
||||
META_SERVICE_METHOD(list, ListReq, ListRsp);
|
||||
META_SERVICE_METHOD(truncate, TruncateReq, TruncateRsp);
|
||||
META_SERVICE_METHOD(getRealPath, GetRealPathReq, GetRealPathRsp);
|
||||
META_SERVICE_METHOD(setAttr, SetAttrReq, SetAttrRsp);
|
||||
META_SERVICE_METHOD(pruneSession, PruneSessionReq, PruneSessionRsp);
|
||||
META_SERVICE_METHOD(dropUserCache, DropUserCacheReq, DropUserCacheRsp);
|
||||
META_SERVICE_METHOD(authenticate, AuthReq, AuthRsp);
|
||||
META_SERVICE_METHOD(lockDirectory, LockDirectoryReq, LockDirectoryRsp);
|
||||
META_SERVICE_METHOD(testRpc, TestRpcReq, TestRpcRsp);
|
||||
META_SERVICE_METHOD(batchStat, BatchStatReq, BatchStatRsp);
|
||||
META_SERVICE_METHOD(batchStatByPath, BatchStatByPathReq, BatchStatByPathRsp);
|
||||
#undef META_SERVICE_METHOD
|
||||
|
||||
private:
|
||||
MetaOperator &meta_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
111
src/meta/service/MetaServer.cc
Normal file
111
src/meta/service/MetaServer.cc
Normal file
@@ -0,0 +1,111 @@
|
||||
#include "meta/service/MetaServer.h"
|
||||
|
||||
#include <folly/experimental/coro/BlockingWait.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include "common/app/ApplicationBase.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "core/service/CoreService.h"
|
||||
#include "fdb/HybridKvEngine.h"
|
||||
#include "meta/components/ChainAllocator.h"
|
||||
#include "meta/service/MetaOperator.h"
|
||||
#include "meta/service/MetaSerdeService.h"
|
||||
#include "stubs/common/RealStubFactory.h"
|
||||
#include "stubs/mgmtd/MgmtdServiceStub.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
MetaServer::MetaServer(const MetaServer::Config &config)
|
||||
: net::Server(config.base()),
|
||||
config_(config) {}
|
||||
|
||||
MetaServer::~MetaServer() { XLOGF(INFO, "Destructor MetaServer"); }
|
||||
|
||||
Result<Void> MetaServer::beforeStart() {
|
||||
if (!backgroundClient_) {
|
||||
backgroundClient_ = std::make_unique<net::Client>(config_.background_client());
|
||||
RETURN_ON_ERROR(backgroundClient_->start());
|
||||
}
|
||||
if (!mgmtdClient_) {
|
||||
auto ctxCreator = [this](net::Address addr) { return backgroundClient_->serdeCtx(addr); };
|
||||
mgmtdClient_ = std::make_shared<::hf3fs::client::MgmtdClientForServer>(
|
||||
appInfo().clusterId,
|
||||
std::make_unique<stubs::RealStubFactory<mgmtd::MgmtdServiceStub>>(std::move(ctxCreator)),
|
||||
config_.mgmtd_client());
|
||||
}
|
||||
|
||||
mgmtdClient_->setAppInfoForHeartbeat(appInfo());
|
||||
mgmtdClient_->setConfigListener(ApplicationBase::updateConfig);
|
||||
mgmtdClient_->updateHeartbeatPayload(flat::MetaHeartbeatInfo{});
|
||||
folly::coro::blockingWait(mgmtdClient_->start(&tpg().bgThreadPool().randomPick()));
|
||||
auto mgmtdClientRefreshRes = folly::coro::blockingWait(mgmtdClient_->refreshRoutingInfo(/*force=*/false));
|
||||
XLOGF_IF(FATAL, !mgmtdClientRefreshRes, "Failed to refresh initial routing info!");
|
||||
|
||||
// init service groups.
|
||||
if (!kvEngine_) {
|
||||
kvEngine_ = kv::HybridKvEngine::from(config_.kv_engine(), config_.use_memkv(), config_.fdb());
|
||||
}
|
||||
|
||||
auto storageClient = storage::client::StorageClient::create(ClientId::random(appInfo().hostname),
|
||||
config_.storage_client(),
|
||||
*mgmtdClient_);
|
||||
XLOGF_IF(FATAL, !storageClient, "Failed to create storage client!");
|
||||
|
||||
auto &appInfo = this->appInfo();
|
||||
XLOGF_IF(FATAL, !appInfo.nodeId, "Invalid nodeId {}", appInfo.nodeId);
|
||||
metaOperator_ = std::make_unique<MetaOperator>(
|
||||
config_.meta(),
|
||||
appInfo.nodeId,
|
||||
kvEngine_,
|
||||
mgmtdClient_,
|
||||
storageClient,
|
||||
std::make_unique<Forward>(config_.meta().forward(), appInfo.nodeId, *backgroundClient_, mgmtdClient_));
|
||||
RETURN_ON_ERROR(addSerdeService(std::make_unique<MetaSerdeService>(*metaOperator_), true));
|
||||
RETURN_ON_ERROR(addSerdeService(std::make_unique<core::CoreService>()));
|
||||
|
||||
// init MetaOperator.
|
||||
std::optional<Layout> rootLayout;
|
||||
if (config_.use_memkv()) {
|
||||
rootLayout = Layout::newEmpty(ChainTableId(1), 512 << 10, 1);
|
||||
}
|
||||
auto result = folly::coro::blockingWait(metaOperator_->init(rootLayout));
|
||||
if (UNLIKELY(!result)) {
|
||||
XLOGF(ERR, "Init MetaOperator failed with {}", result.error().describe());
|
||||
RETURN_ON_ERROR(result);
|
||||
}
|
||||
|
||||
metaOperator_->start(tpg().bgThreadPool());
|
||||
|
||||
return Void{};
|
||||
}
|
||||
|
||||
Result<Void> MetaServer::beforeStop() {
|
||||
metaOperator_->beforeStop();
|
||||
if (mgmtdClient_) {
|
||||
folly::coro::blockingWait(mgmtdClient_->stop());
|
||||
}
|
||||
return Void{};
|
||||
}
|
||||
|
||||
Result<Void> MetaServer::afterStop() {
|
||||
metaOperator_->afterStop();
|
||||
if (backgroundClient_) {
|
||||
backgroundClient_->stopAndJoin();
|
||||
}
|
||||
return Void{};
|
||||
}
|
||||
|
||||
Result<Void> MetaServer::start(const flat::AppInfo &info, std::shared_ptr<kv::IKVEngine> kvEngine) {
|
||||
kvEngine_ = std::move(kvEngine);
|
||||
return net::Server::start(info);
|
||||
}
|
||||
|
||||
Result<Void> MetaServer::start(const flat::AppInfo &info,
|
||||
std::unique_ptr<net::Client> client,
|
||||
std::shared_ptr<::hf3fs::client::MgmtdClient> mgmtdClient) {
|
||||
backgroundClient_ = std::move(client);
|
||||
mgmtdClient_ = std::make_shared<::hf3fs::client::MgmtdClientForServer>(std::move(mgmtdClient));
|
||||
return net::Server::start(info);
|
||||
}
|
||||
} // namespace hf3fs::meta::server
|
||||
96
src/meta/service/MetaServer.h
Normal file
96
src/meta/service/MetaServer.h
Normal file
@@ -0,0 +1,96 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "client/mgmtd/MgmtdClientForServer.h"
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/logging/LogConfig.h"
|
||||
#include "common/net/Client.h"
|
||||
#include "common/net/Server.h"
|
||||
#include "common/utils/BackgroundRunner.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "core/app/ServerAppConfig.h"
|
||||
#include "core/app/ServerLauncher.h"
|
||||
#include "core/app/ServerLauncherConfig.h"
|
||||
#include "core/app/ServerMgmtdClientFetcher.h"
|
||||
#include "fdb/HybridKvEngineConfig.h"
|
||||
#include "meta/base/Config.h"
|
||||
#include "meta/service/MetaOperator.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class MetaServer : public net::Server {
|
||||
public:
|
||||
static constexpr auto kName = "Meta";
|
||||
static constexpr auto kNodeType = flat::NodeType::META;
|
||||
|
||||
struct CommonConfig : public ApplicationBase::Config {
|
||||
CommonConfig() {
|
||||
using logging::LogConfig;
|
||||
log().set_categories({LogConfig::makeRootCategoryConfig(), LogConfig::makeEventCategoryConfig()});
|
||||
log().set_handlers({LogConfig::makeNormalHandlerConfig(),
|
||||
LogConfig::makeErrHandlerConfig(),
|
||||
LogConfig::makeFatalHandlerConfig(),
|
||||
LogConfig::makeEventHandlerConfig()});
|
||||
}
|
||||
};
|
||||
|
||||
using AppConfig = core::ServerAppConfig;
|
||||
struct LauncherConfig : public core::ServerLauncherConfig {
|
||||
LauncherConfig() { mgmtd_client() = hf3fs::client::MgmtdClientForServer::Config{}; }
|
||||
};
|
||||
using RemoteConfigFetcher = core::launcher::ServerMgmtdClientFetcher;
|
||||
using Launcher = core::ServerLauncher<MetaServer>;
|
||||
|
||||
struct Config : public ConfigBase<Config> {
|
||||
CONFIG_ITEM(use_memkv, false); // deprecated
|
||||
|
||||
CONFIG_OBJ(base, net::Server::Config, [](net::Server::Config &c) {
|
||||
c.set_groups_length(2);
|
||||
c.groups(0).listener().set_listen_port(8000);
|
||||
c.groups(0).set_services({"MetaSerde"});
|
||||
|
||||
c.groups(1).set_network_type(net::Address::TCP);
|
||||
c.groups(1).listener().set_listen_port(9000);
|
||||
c.groups(1).set_use_independent_thread_pool(true);
|
||||
c.groups(1).set_services({"Core"});
|
||||
});
|
||||
CONFIG_OBJ(fdb, kv::fdb::FDBConfig); // deprecated
|
||||
CONFIG_OBJ(meta, meta::server::Config);
|
||||
CONFIG_OBJ(background_client, net::Client::Config);
|
||||
CONFIG_OBJ(mgmtd_client, ::hf3fs::client::MgmtdClientForServer::Config);
|
||||
CONFIG_OBJ(storage_client, storage::client::StorageClient::Config, [](storage::client::StorageClient::Config &cfg) {
|
||||
cfg.retry().set_init_wait_time(2_s);
|
||||
cfg.retry().set_max_wait_time(5_s);
|
||||
cfg.retry().set_max_retry_time(5_s);
|
||||
cfg.retry().set_max_failures_before_failover(1);
|
||||
});
|
||||
CONFIG_OBJ(kv_engine, kv::HybridKvEngineConfig);
|
||||
};
|
||||
|
||||
MetaServer(const Config &config);
|
||||
~MetaServer() override;
|
||||
|
||||
using net::Server::start;
|
||||
Result<Void> start(const flat::AppInfo &info, std::shared_ptr<kv::IKVEngine> kvEngine);
|
||||
Result<Void> start(const flat::AppInfo &info,
|
||||
std::unique_ptr<net::Client> client,
|
||||
std::shared_ptr<::hf3fs::client::MgmtdClient> mgmtdClient);
|
||||
|
||||
// set up meta server.
|
||||
Result<Void> beforeStart() final;
|
||||
|
||||
// tear down meta server.
|
||||
Result<Void> beforeStop() final;
|
||||
Result<Void> afterStop() final;
|
||||
|
||||
private:
|
||||
const Config &config_;
|
||||
|
||||
std::shared_ptr<kv::IKVEngine> kvEngine_;
|
||||
std::unique_ptr<net::Client> backgroundClient_;
|
||||
std::shared_ptr<::hf3fs::client::MgmtdClientForServer> mgmtdClient_;
|
||||
std::unique_ptr<MetaOperator> metaOperator_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
108
src/meta/service/MockMeta.h
Normal file
108
src/meta/service/MockMeta.h
Normal file
@@ -0,0 +1,108 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Random.h>
|
||||
#include <folly/Utility.h>
|
||||
#include <folly/executors/CPUThreadPoolExecutor.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/kv/mem/MemKVEngine.h"
|
||||
#include "common/serde/ClientMockContext.h"
|
||||
#include "common/utils/CPUExecutorGroup.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "meta/components/ChainAllocator.h"
|
||||
#include "meta/components/FileHelper.h"
|
||||
#include "meta/components/GcManager.h"
|
||||
#include "meta/service/MetaOperator.h"
|
||||
#include "meta/service/MetaSerdeService.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class MockMeta : folly::NonCopyableNonMovable {
|
||||
public:
|
||||
static CoTryTask<std::unique_ptr<MockMeta>> create(const Config &cfg,
|
||||
std::shared_ptr<kv::IKVEngine> kv,
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient) {
|
||||
auto meta = std::unique_ptr<MockMeta>(new MockMeta(cfg, kv, mgmtdClient));
|
||||
for (auto &moperator : meta->operators_) {
|
||||
CO_RETURN_ON_ERROR(co_await moperator->init(Layout::newEmpty(ChainTableId(1), 512 << 10, 128)));
|
||||
}
|
||||
co_return meta;
|
||||
}
|
||||
|
||||
~MockMeta() { stop(); }
|
||||
|
||||
void start(CPUExecutorGroup &exec) {
|
||||
for (auto &moperator : operators_) {
|
||||
moperator->start(exec);
|
||||
}
|
||||
}
|
||||
|
||||
void stop() {
|
||||
for (auto &moperator : operators_) {
|
||||
moperator->beforeStop();
|
||||
moperator->afterStop();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<MetaSerdeService> getService() { return std::make_unique<MetaSerdeService>(*operators_.at(0)); }
|
||||
|
||||
MetaOperator &getOperator() { return *operators_.at(0); }
|
||||
|
||||
MetaStore &getStore() { return dynamic_cast<MetaStore &>(*getOperator().metaStore_); }
|
||||
|
||||
storage::client::StorageClient &getStorageClient() { return *storageClient_; }
|
||||
|
||||
FileHelper &getFileHelper() { return *getOperator().fileHelper_; }
|
||||
|
||||
GcManager &getGcManager() { return *getOperator().gcManager_; }
|
||||
|
||||
SessionManager &getSessionManager() { return *getOperator().sessionManager_; }
|
||||
|
||||
private:
|
||||
MockMeta(const Config &cfg,
|
||||
std::shared_ptr<kv::IKVEngine> kv,
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient)
|
||||
: cfg_(cfg),
|
||||
mgmtdClient_(mgmtdClient) {
|
||||
storageClientCfg_.set_implementation_type(storage::client::StorageClient::ImplementationType::InMem);
|
||||
storageClient_ = storage::client::StorageClient::create(ClientId::random(), storageClientCfg_, *mgmtdClient_);
|
||||
|
||||
auto routing = mgmtdClient->getRoutingInfo();
|
||||
XLOGF_IF(FATAL, !routing, "routing info not available");
|
||||
auto nodes = routing->getNodeBy(flat::selectNodeByType(flat::NodeType::META) && flat::selectActiveNode());
|
||||
XLOGF_IF(FATAL, nodes.empty(), "no active metas");
|
||||
for (auto &node : nodes) {
|
||||
XLOGF_IF(FATAL, contexts_.contains(node.app.nodeId), "duplicated {}", node.app.nodeId);
|
||||
auto moperator = std::make_unique<MetaOperator>(
|
||||
cfg,
|
||||
node.app.nodeId,
|
||||
kv,
|
||||
mgmtdClient_,
|
||||
storageClient_,
|
||||
std::make_unique<Forward>(cfg.forward(), node.app.nodeId, contexts_, mgmtdClient_));
|
||||
contexts_[node.app.nodeId] = serde::ClientMockContext::create(std::make_unique<MetaSerdeService>(*moperator));
|
||||
operators_.push_back(std::move(moperator));
|
||||
}
|
||||
}
|
||||
|
||||
[[maybe_unused]] const Config &cfg_;
|
||||
storage::client::StorageClient::Config storageClientCfg_;
|
||||
std::vector<std::unique_ptr<MetaOperator>> operators_;
|
||||
std::map<flat::NodeId, serde::ClientMockContext> contexts_;
|
||||
std::shared_ptr<client::ICommonMgmtdClient> mgmtdClient_;
|
||||
std::shared_ptr<storage::client::StorageClient> storageClient_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
112
src/meta/store/BatchContext.h
Normal file
112
src/meta/store/BatchContext.h
Normal file
@@ -0,0 +1,112 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/Utility.h>
|
||||
#include <folly/experimental/coro/Baton.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <folly/futures/Promise.h>
|
||||
#include <folly/io/async/Request.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class BatchContext : public folly::RequestData {
|
||||
public:
|
||||
template <typename T>
|
||||
struct SharedFuture : folly::NonCopyableNonMovable {
|
||||
Result<T> value = makeError(StatusCode::kUnknown);
|
||||
folly::coro::Baton baton;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct LoadGuard {
|
||||
bool needLoad;
|
||||
std::shared_ptr<SharedFuture<T>> future;
|
||||
|
||||
LoadGuard(bool needLoad, std::shared_ptr<SharedFuture<T>> future)
|
||||
: needLoad(needLoad),
|
||||
future(future) {}
|
||||
|
||||
~LoadGuard() {
|
||||
if (needLoad && !future->baton.ready()) {
|
||||
future->value = makeError(StatusCode::kUnknown, "load failed in BatchContext");
|
||||
future->baton.post();
|
||||
}
|
||||
}
|
||||
|
||||
void set(const Result<T> &r) {
|
||||
assert(!future->baton.ready());
|
||||
future->value = r;
|
||||
future->baton.post();
|
||||
}
|
||||
|
||||
CoTryTask<T> coAwait() {
|
||||
co_await future->baton;
|
||||
co_return future->value;
|
||||
}
|
||||
};
|
||||
|
||||
static folly::ShallowCopyRequestContextScopeGuard create() {
|
||||
return folly::ShallowCopyRequestContextScopeGuard{token(), std::make_unique<BatchContext>()};
|
||||
}
|
||||
|
||||
static inline BatchContext *get() {
|
||||
auto requestContext = folly::RequestContext::try_get();
|
||||
if (LIKELY(requestContext == nullptr)) {
|
||||
return nullptr;
|
||||
}
|
||||
return dynamic_cast<BatchContext *>(requestContext->getContextData(token()));
|
||||
}
|
||||
|
||||
LoadGuard<std::optional<Inode>> loadInode(InodeId inodeId) {
|
||||
return loadImpl<InodeId, std::optional<Inode>>(inodes_, inodeId);
|
||||
}
|
||||
|
||||
LoadGuard<std::optional<DirEntry>> loadDirEntry(InodeId parent, std::string name) {
|
||||
return loadImpl<std::pair<InodeId, std::string>, std::optional<DirEntry>>(entries_, {parent, std::move(name)});
|
||||
}
|
||||
|
||||
bool hasCallback() override { return false; }
|
||||
|
||||
private:
|
||||
static constexpr const char *kTokenName = "hf3fs::meta::server::BatchContext";
|
||||
|
||||
static folly::RequestToken const &token() {
|
||||
static folly::RequestToken const token(kTokenName);
|
||||
return token;
|
||||
}
|
||||
|
||||
template <typename K, typename T>
|
||||
using SynchronizedFutureMap = folly::Synchronized<std::map<K, std::shared_ptr<SharedFuture<T>>>, std::mutex>;
|
||||
|
||||
template <typename K, typename T>
|
||||
LoadGuard<T> loadImpl(SynchronizedFutureMap<K, T> &map, K key) {
|
||||
auto guard = map.lock();
|
||||
auto iter = guard->find(key);
|
||||
if (iter != guard->end()) {
|
||||
return LoadGuard<T>(false, iter->second);
|
||||
}
|
||||
auto future = std::make_shared<SharedFuture<T>>();
|
||||
guard->emplace(std::move(key), future);
|
||||
return LoadGuard<T>(true, future);
|
||||
}
|
||||
|
||||
SynchronizedFutureMap<InodeId, std::optional<Inode>> inodes_;
|
||||
SynchronizedFutureMap<std::pair<InodeId, std::string>, std::optional<DirEntry>> entries_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
327
src/meta/store/DirEntry.cc
Normal file
327
src/meta/store/DirEntry.cc
Normal file
@@ -0,0 +1,327 @@
|
||||
#include "meta/store/DirEntry.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/core/ignore_unused.hpp>
|
||||
#include <cassert>
|
||||
#include <fmt/core.h>
|
||||
#include <fmt/format.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/experimental/coro/Collect.h>
|
||||
#include <folly/experimental/coro/CurrentExecutor.h>
|
||||
#include <folly/functional/Partial.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <linux/limits.h>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/MagicEnum.hpp"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "meta/store/BatchContext.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
namespace {
|
||||
constexpr auto prefix = kv::KeyPrefix::Dentry;
|
||||
|
||||
bool checkName(std::string_view name) {
|
||||
return !name.empty() && name != "." && name != ".." && std::find(name.begin(), name.end(), '/') == name.end();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
/** DirEntry */
|
||||
std::string DirEntry::packKey(InodeId parent, std::string_view name) {
|
||||
String buf;
|
||||
buf.reserve(sizeof(prefix) + sizeof(InodeId::Key) + name.size());
|
||||
Serializer ser{buf};
|
||||
ser.put(prefix);
|
||||
ser.put(parent.packKey());
|
||||
ser.putRaw(name.data(), name.size());
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
std::string DirEntry::packKey() const { return packKey(parent, name); }
|
||||
|
||||
Result<Void> DirEntry::unpackKey(const std::string_view key) {
|
||||
// todo: log more data
|
||||
Deserializer des(key);
|
||||
auto p = des.get<kv::KeyPrefix>();
|
||||
RETURN_ON_ERROR(p);
|
||||
assert(p.value() == prefix);
|
||||
auto parentKey = des.get<InodeId::Key>();
|
||||
RETURN_ON_ERROR(parentKey);
|
||||
parent = InodeId::unpackKey(parentKey.value());
|
||||
auto nameRes = des.getRawUntilEnd();
|
||||
RETURN_ON_ERROR(nameRes);
|
||||
name = *nameRes;
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
Result<DirEntry> DirEntry::newUnpacked(const std::string_view key, const std::string_view value) {
|
||||
DirEntry entry;
|
||||
if (auto result = entry.unpackKey(key); result.hasError()) {
|
||||
auto formattedKey =
|
||||
fmt::format("{:02x}", fmt::join((uint8_t *)key.data(), (uint8_t *)key.data() + key.length(), ","));
|
||||
XLOGF(CRITICAL,
|
||||
"Failed to deserialize dirEntry key {}, error {}, data corruption!!!",
|
||||
formattedKey,
|
||||
result.error());
|
||||
return makeError(StatusCode::kDataCorruption, fmt::format("deserialize dirEntry key {} failed", formattedKey));
|
||||
}
|
||||
if (auto des = serde::deserialize(entry.data(), value); des.hasError()) {
|
||||
XLOGF(CRITICAL,
|
||||
"Failed to deserialize dirEntry {}/{}, {}, data corruption!!!",
|
||||
entry.parent,
|
||||
entry.name,
|
||||
des.error());
|
||||
return makeError(StatusCode::kDataCorruption);
|
||||
}
|
||||
return std::move(entry);
|
||||
}
|
||||
|
||||
template <const bool SNAPSHOT>
|
||||
CoTryTask<std::optional<DirEntry>> DirEntry::loadImpl(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view name) {
|
||||
auto func = SNAPSHOT ? &IReadOnlyTransaction::snapshotGet : &IReadOnlyTransaction::get;
|
||||
if (name.size() > NAME_MAX) {
|
||||
XLOGF(DBG, "length of name {} > {}", name, NAME_MAX);
|
||||
co_return makeError(MetaCode::kNameTooLong, fmt::format("{} > {}", name, NAME_MAX));
|
||||
}
|
||||
auto result = co_await (txn.*func)(packKey(parent, name));
|
||||
if (result.hasError()) {
|
||||
XLOGF(ERR, "Failed to load dirEntry {}/{}, error {}", parent, name, result.error());
|
||||
CO_RETURN_ERROR(result);
|
||||
}
|
||||
if (auto &value = *result; value.has_value()) {
|
||||
DirEntry entry(parent, std::string(name), {});
|
||||
#ifndef NDEBUG
|
||||
entry.snapshotLoaded_ = SNAPSHOT;
|
||||
#endif
|
||||
if (auto des = serde::deserialize(entry.data(), *value); des.hasError()) {
|
||||
XLOGF(CRITICAL, "Failed to deserialize dirEntry {}/{}, {}, data corruption!!", parent, name, des.error());
|
||||
co_return makeError(StatusCode::kDataCorruption);
|
||||
}
|
||||
co_return std::move(entry);
|
||||
} else {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<DirEntry>> DirEntry::snapshotLoad(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view name) {
|
||||
if (auto batch = BatchContext::get(); batch) {
|
||||
auto guard = batch->loadDirEntry(parent, std::string(name));
|
||||
if (guard.needLoad) {
|
||||
auto r = co_await loadImpl<true>(txn, parent, name);
|
||||
guard.set(r);
|
||||
co_return std::move(r);
|
||||
} else {
|
||||
co_return co_await guard.coAwait();
|
||||
}
|
||||
} else {
|
||||
co_return co_await loadImpl<true>(txn, parent, name);
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<DirEntry>> DirEntry::load(IReadOnlyTransaction &txn, InodeId parent, std::string_view name) {
|
||||
co_return co_await loadImpl<false>(txn, parent, name);
|
||||
}
|
||||
|
||||
CoTryTask<void> DirEntry::store(IReadWriteTransaction &txn) const {
|
||||
assert(!snapshotLoaded_);
|
||||
if (UNLIKELY(!checkName(name))) {
|
||||
XLOGF(DFATAL, "DirEntry name {} is invalid, should never happen!!!", name);
|
||||
co_return makeError(MetaCode::kFoundBug, fmt::format("Invalid DirEntry name {}!", name));
|
||||
}
|
||||
if (UNLIKELY(name.size() > NAME_MAX)) {
|
||||
co_return makeError(MetaCode::kNameTooLong, fmt::format("name {} len > {}", name, NAME_MAX));
|
||||
}
|
||||
if (UNLIKELY(id.isTreeRoot())) {
|
||||
XLOGF(DFATAL, "DirEntry {} points to tree root, should never happen!!!", *this);
|
||||
co_return makeError(MetaCode::kFoundBug, fmt::format("DirEntry {} points to tree root", *this));
|
||||
}
|
||||
|
||||
auto key = packKey();
|
||||
auto value = serde::serialize(data());
|
||||
if (auto result = co_await txn.set(key, value); result.hasError()) {
|
||||
XLOGF(ERR, "Failed to store dirEntry {}, error {}", *this, result.error());
|
||||
co_return result;
|
||||
}
|
||||
XLOGF(DBG, "DirEntry store {}/{}, {}", parent, name, id);
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> DirEntry::remove(IReadWriteTransaction &txn, bool ignoreSnapshotCheck) const {
|
||||
assert(!snapshotLoaded_ || ignoreSnapshotCheck);
|
||||
boost::ignore_unused(ignoreSnapshotCheck);
|
||||
if (UNLIKELY(!checkName(name))) {
|
||||
XLOGF(DFATAL, "DirEntry name {} is invalid, should never happen!!!", name);
|
||||
co_return makeError(StatusCode::kInvalidArg, fmt::format("Invalid DirEntry name {}!", name));
|
||||
}
|
||||
XLOGF(DBG, "Remove direntry {}/{}", parent, name);
|
||||
co_return co_await txn.clear(packKey());
|
||||
}
|
||||
|
||||
static inline Result<Inode> checkInodeExists(const DirEntry &entry, std::optional<Inode> result) {
|
||||
if (result.has_value()) {
|
||||
return std::move(result.value());
|
||||
} else if (entry.name == "." || entry.name == "..") {
|
||||
// this is a fake dirEntry, so inode may not exists
|
||||
XLOGF(DBG, "Inode of entry {} doesn't exist", entry);
|
||||
return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
auto msg = fmt::format("DirEntry {} exists, but Inode not found", entry);
|
||||
XLOGF(CRITICAL, "Metadata inconsistent: {}!!!", msg);
|
||||
return makeError(MetaCode::kInconsistent, std::move(msg));
|
||||
}
|
||||
|
||||
static inline Result<Inode> checkInodeType(const DirEntry &entry, Inode inode) {
|
||||
if (UNLIKELY(inode.getType() != entry.type)) {
|
||||
auto msg = fmt::format("DirEntry {}/{} -> {} found, but InodeType mismatch {} != {}",
|
||||
entry.parent,
|
||||
entry.name,
|
||||
entry.id,
|
||||
magic_enum::enum_name(inode.getType()),
|
||||
magic_enum::enum_name(entry.type));
|
||||
XLOGF(CRITICAL, "Metadata inconsistent: {}!!!", msg);
|
||||
return makeError(MetaCode::kInconsistent, std::move(msg));
|
||||
}
|
||||
return std::move(inode);
|
||||
}
|
||||
|
||||
template <typename Txn, CoTryTask<std::optional<Inode>> (*LoadFunc)(Txn &, InodeId id)>
|
||||
static CoTryTask<Inode> loadInodeFromDirEntry(Txn &txn, const DirEntry &entry) {
|
||||
co_return (co_await (*LoadFunc)(txn, entry.id))
|
||||
.then(folly::partial(checkInodeExists, entry))
|
||||
.then(folly::partial(checkInodeType, entry));
|
||||
}
|
||||
|
||||
CoTryTask<Inode> DirEntry::loadInode(IReadOnlyTransaction &txn) const {
|
||||
co_return co_await loadInodeFromDirEntry<IReadOnlyTransaction, &Inode::load>(txn, *this);
|
||||
}
|
||||
|
||||
CoTryTask<Inode> DirEntry::snapshotLoadInode(IReadOnlyTransaction &txn) const {
|
||||
co_return co_await loadInodeFromDirEntry<IReadOnlyTransaction, &Inode::snapshotLoad>(txn, *this);
|
||||
}
|
||||
|
||||
/** DirEntryList */
|
||||
template <const bool SNAPSHOT>
|
||||
CoTryTask<DirEntryList> DirEntryList::loadImpl(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
IReadOnlyTransaction::KeySelector begin,
|
||||
IReadOnlyTransaction::KeySelector end,
|
||||
int32_t limit,
|
||||
bool loadInodes,
|
||||
size_t loadInodesConcurrent) {
|
||||
auto func = SNAPSHOT ? &IReadOnlyTransaction::snapshotGetRange : &IReadOnlyTransaction::getRange;
|
||||
auto result = co_await (txn.*func)(begin, end, limit > 0 ? limit : 128);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
bool more = result->hasMore;
|
||||
|
||||
std::vector<DirEntry> entries;
|
||||
for (auto &kv : result->kvs) {
|
||||
const auto &[key, value] = kv.pair();
|
||||
auto entry = DirEntry::newUnpacked(key, value);
|
||||
CO_RETURN_ON_ERROR(entry);
|
||||
XLOGF_IF(FATAL, entry->parent != parent, "DirEntryList::load {}, get entry {}", parent, *entry);
|
||||
#ifndef NDEBUG
|
||||
entry->snapshotLoaded_ = SNAPSHOT;
|
||||
#endif
|
||||
entries.push_back(std::move(entry.value()));
|
||||
}
|
||||
|
||||
if (!loadInodes) {
|
||||
co_return DirEntryList{std::move(entries), {}, more};
|
||||
}
|
||||
if (loadInodesConcurrent <= 0) {
|
||||
loadInodesConcurrent = 8;
|
||||
}
|
||||
|
||||
auto exec = co_await folly::coro::co_current_executor;
|
||||
std::vector<Inode> inodes;
|
||||
auto iter = entries.begin();
|
||||
while (iter != entries.end()) {
|
||||
std::vector<folly::SemiFuture<Result<Inode>>> tasks;
|
||||
while (iter != entries.end() && tasks.size() < loadInodesConcurrent) {
|
||||
auto &entry = *iter;
|
||||
auto func = SNAPSHOT ? &DirEntry::snapshotLoadInode : &DirEntry::loadInode;
|
||||
tasks.push_back((entry.*func)(txn).scheduleOn(exec).start());
|
||||
iter++;
|
||||
}
|
||||
auto results = co_await folly::coro::collectAllRange(std::move(tasks));
|
||||
for (auto result : results) {
|
||||
XLOGF_IF(INFO, result.hasError(), "here error {}", result.error());
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
inodes.push_back(*result);
|
||||
}
|
||||
}
|
||||
|
||||
co_return DirEntryList{std::move(entries), std::move(inodes), more};
|
||||
}
|
||||
|
||||
CoTryTask<DirEntryList> DirEntryList::snapshotLoad(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view prev,
|
||||
int32_t limit,
|
||||
bool loadInodes,
|
||||
size_t loadInodesConcurrent) {
|
||||
std::string beginKey = DirEntry::packKey(parent, prev);
|
||||
std::string prefix = DirEntry::packKey(parent, "");
|
||||
std::string endKey = kv::TransactionHelper::prefixListEndKey(prefix);
|
||||
IReadOnlyTransaction::KeySelector begin{beginKey, false};
|
||||
IReadOnlyTransaction::KeySelector end{endKey, false};
|
||||
co_return co_await loadImpl<true>(txn, parent, begin, end, limit, loadInodes, loadInodesConcurrent);
|
||||
}
|
||||
|
||||
CoTryTask<DirEntryList> DirEntryList::snapshotLoad(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view begin,
|
||||
std::string_view end,
|
||||
int32_t limit,
|
||||
bool loadInodes,
|
||||
size_t loadInodesConcurrent) {
|
||||
std::string beginKey = DirEntry::packKey(parent, begin);
|
||||
std::string endKey = DirEntry::packKey(parent, end);
|
||||
IReadOnlyTransaction::KeySelector selBegin{beginKey, false};
|
||||
IReadOnlyTransaction::KeySelector selEnd{endKey, false};
|
||||
co_return co_await loadImpl<false>(txn, parent, selBegin, selEnd, limit, loadInodes, loadInodesConcurrent);
|
||||
}
|
||||
|
||||
CoTryTask<DirEntryList> DirEntryList::load(IReadWriteTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view prev,
|
||||
int32_t limit) {
|
||||
std::string beginKey = DirEntry::packKey(parent, prev);
|
||||
std::string prefix = DirEntry::packKey(parent, "");
|
||||
std::string endKey = kv::TransactionHelper::prefixListEndKey(prefix);
|
||||
IReadOnlyTransaction::KeySelector begin{beginKey, false};
|
||||
IReadOnlyTransaction::KeySelector end{endKey, false};
|
||||
co_return co_await loadImpl<false>(txn, parent, begin, end, limit, false, 8);
|
||||
}
|
||||
|
||||
CoTryTask<bool> DirEntryList::checkEmpty(IReadWriteTransaction &txn, InodeId parent) {
|
||||
auto prefix = DirEntry::packKey(parent, "");
|
||||
auto endKey = kv::TransactionHelper::prefixListEndKey(prefix);
|
||||
IReadWriteTransaction::KeySelector begin(prefix, false);
|
||||
IReadWriteTransaction::KeySelector end(endKey, false);
|
||||
auto result = co_await txn.getRange(begin, end, 1);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
|
||||
co_return result->kvs.empty() && !result->hasMore;
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
214
src/meta/store/DirEntry.h
Normal file
214
src/meta/store/DirEntry.h
Normal file
@@ -0,0 +1,214 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Expected.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/SerDeser.h"
|
||||
#include "fbs/core/user/User.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
using hf3fs::kv::IReadOnlyTransaction;
|
||||
using hf3fs::kv::IReadWriteTransaction;
|
||||
|
||||
struct DirEntryList;
|
||||
|
||||
class DirEntry : public meta::DirEntry {
|
||||
public:
|
||||
using Base = meta::DirEntry;
|
||||
using Base::Base;
|
||||
|
||||
DirEntry(Base base)
|
||||
: Base(std::move(base)) {}
|
||||
|
||||
static Result<DirEntry> newUnpacked(const std::string_view key, const std::string_view value);
|
||||
|
||||
static CoTryTask<std::optional<DirEntry>> snapshotLoad(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view name);
|
||||
|
||||
static CoTryTask<std::optional<DirEntry>> load(IReadOnlyTransaction &txn, InodeId parent, std::string_view name);
|
||||
|
||||
static CoTryTask<bool> checkExist(IReadOnlyTransaction &txn, InodeId parent, std::string_view name) {
|
||||
co_return (co_await DirEntry::load(txn, parent, name)).then([](auto &v) { return v.has_value(); });
|
||||
}
|
||||
|
||||
static DirEntry newFile(InodeId parent, std::string name, InodeId inode) {
|
||||
return meta::DirEntry(parent, name, {inode, InodeType::File});
|
||||
}
|
||||
static DirEntry newSymlink(InodeId parent, std::string name, InodeId inode) {
|
||||
return meta::DirEntry(parent, name, {inode, InodeType::Symlink});
|
||||
}
|
||||
static DirEntry newDirectory(InodeId parent, std::string name, InodeId inode, Acl acl) {
|
||||
return meta::DirEntry(parent, name, {inode, InodeType::Directory, acl});
|
||||
}
|
||||
static DirEntry root() {
|
||||
return meta::DirEntry(InodeId::root(), ".", {InodeId::root(), InodeType::Directory, Acl::root()});
|
||||
}
|
||||
|
||||
/** Key format: prefix + parent-InodeId.key + name */
|
||||
std::string packKey() const;
|
||||
static std::string packKey(InodeId parent, std::string_view name);
|
||||
Result<Void> unpackKey(const std::string_view key);
|
||||
|
||||
// load inode from dir entry
|
||||
CoTryTask<Inode> loadInode(IReadOnlyTransaction &txn) const;
|
||||
CoTryTask<Inode> snapshotLoadInode(IReadOnlyTransaction &txn) const;
|
||||
CoTryTask<void> addIntoReadConflict(IReadWriteTransaction &txn) const {
|
||||
#ifndef NDEBUG
|
||||
snapshotLoaded_ = false;
|
||||
#endif
|
||||
co_return co_await txn.addReadConflict(packKey());
|
||||
}
|
||||
CoTryTask<void> store(IReadWriteTransaction &txn) const;
|
||||
CoTryTask<void> remove(IReadWriteTransaction &txn, bool ignoreSnapshotCheck = false) const;
|
||||
|
||||
private:
|
||||
friend struct DirEntryList;
|
||||
friend class MetaTestHelper;
|
||||
|
||||
template <const bool SNAPSHOT>
|
||||
static CoTryTask<std::optional<DirEntry>> loadImpl(IReadOnlyTransaction &txn, InodeId parent, std::string_view name);
|
||||
|
||||
#ifndef NDEBUG
|
||||
mutable bool snapshotLoaded_ = false;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct DirEntryList {
|
||||
std::vector<DirEntry> entries;
|
||||
std::vector<Inode> inodes;
|
||||
bool more;
|
||||
|
||||
// (prev, end)
|
||||
static CoTryTask<DirEntryList> snapshotLoad(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view prev,
|
||||
int32_t limit,
|
||||
bool loadInodes = false,
|
||||
size_t loadInodesConcurrent = 0);
|
||||
|
||||
// (begin, end)
|
||||
static CoTryTask<DirEntryList> snapshotLoad(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
std::string_view begin,
|
||||
std::string_view end,
|
||||
int32_t limit,
|
||||
bool loadInodes = false,
|
||||
size_t loadInodesConcurrent = 0);
|
||||
|
||||
static CoTryTask<DirEntryList> load(IReadWriteTransaction &txn, InodeId parent, std::string_view prev, int32_t limit);
|
||||
|
||||
static CoTryTask<bool> checkEmpty(IReadWriteTransaction &txn, InodeId parent);
|
||||
|
||||
// For recursive remove and move to the trash, permission checks are required.
|
||||
// However, because the directory may be very large, we may not able to check permissions for entire
|
||||
// directory tree. This method is best effort.
|
||||
static CoTryTask<Void> recursiveCheckRmPerm(IReadWriteTransaction &txn,
|
||||
InodeId parent,
|
||||
flat::UserInfo user,
|
||||
int32_t limit,
|
||||
size_t listBatchSize) {
|
||||
static monitor::CountRecorder failed("meta_server.recursive_check_rm_perm_failed");
|
||||
auto guard = folly::makeGuard([&]() {
|
||||
failed.addSample(1, {{"uid", folly::to<std::string>(user.uid.toUnderType())}});
|
||||
});
|
||||
|
||||
auto queue = std::queue<InodeId>();
|
||||
queue.push(parent);
|
||||
while (!queue.empty()) {
|
||||
auto currDir = queue.front();
|
||||
queue.pop();
|
||||
|
||||
auto prev = std::string();
|
||||
auto foundDir = false;
|
||||
auto numEntries = 0;
|
||||
while (true) {
|
||||
if (limit-- <= 0) {
|
||||
break;
|
||||
}
|
||||
auto list = co_await DirEntryList::snapshotLoad(txn, currDir, prev, std::max(listBatchSize, 32ul));
|
||||
CO_RETURN_ON_ERROR(list);
|
||||
numEntries += list->entries.size();
|
||||
for (auto &entry : list->entries) {
|
||||
prev = entry.name;
|
||||
if (!entry.isDirectory()) {
|
||||
continue;
|
||||
}
|
||||
foundDir = true;
|
||||
if ((int64_t)queue.size() < limit) {
|
||||
queue.push(entry.id);
|
||||
}
|
||||
auto &acl = *entry.dirAcl;
|
||||
if (auto res = acl.checkRecursiveRmPerm(user, false); res.hasError()) {
|
||||
auto msg = fmt::format("user {} recursive remove {}, found {} without permission, msg {}",
|
||||
user.uid,
|
||||
parent,
|
||||
entry,
|
||||
res.error().message());
|
||||
XLOG(ERR, msg);
|
||||
co_return makeError(MetaCode::kNoPermission, msg);
|
||||
}
|
||||
}
|
||||
if (!list->more || (numEntries > 1024 && !foundDir)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
guard.dismiss();
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
DirEntry &entry(size_t i) { return entries.at(i); }
|
||||
const DirEntry &entry(size_t i) const { return entries.at(i); }
|
||||
|
||||
const Inode &inode(size_t i) const { return inodes.at(i); }
|
||||
Inode &inode(size_t i) { return inodes.at(i); }
|
||||
|
||||
operator ListRsp() && {
|
||||
ListRsp rsp;
|
||||
rsp.more = more;
|
||||
rsp.entries.reserve(entries.size());
|
||||
rsp.inodes.reserve(inodes.size());
|
||||
for (auto &entry : entries) {
|
||||
rsp.entries.emplace_back(std::move(entry));
|
||||
}
|
||||
for (auto &inode : inodes) {
|
||||
rsp.inodes.emplace_back(std::move(inode));
|
||||
}
|
||||
return rsp;
|
||||
}
|
||||
|
||||
private:
|
||||
template <const bool SNAPSHOT>
|
||||
static CoTryTask<DirEntryList> loadImpl(IReadOnlyTransaction &txn,
|
||||
InodeId parent,
|
||||
IReadOnlyTransaction::KeySelector begin,
|
||||
IReadOnlyTransaction::KeySelector end,
|
||||
int32_t limit,
|
||||
bool loadInodes,
|
||||
size_t loadInodesConcurrent);
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
280
src/meta/store/FileSession.cc
Normal file
280
src/meta/store/FileSession.cc
Normal file
@@ -0,0 +1,280 @@
|
||||
#include "meta/store/FileSession.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/SerDeser.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
|
||||
// todo: move ito meta/utils
|
||||
#define FMT_KEY(key) fmt::join((const uint8_t *)key.data(), (const uint8_t *)key.data() + key.size(), ",")
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
namespace {
|
||||
|
||||
struct SessionByInode {
|
||||
static constexpr auto keyPrefix = kv::KeyPrefix::InodeSession;
|
||||
|
||||
static std::string prefixOf(const InodeId inodeId) { return Serializer::serRawArgs(keyPrefix, inodeId.packKey()); }
|
||||
static std::string packKey(InodeId inodeId, Uuid sessionId) {
|
||||
return Serializer::serRawArgs(keyPrefix, inodeId.packKey(), sessionId);
|
||||
}
|
||||
static std::pair<InodeId, Uuid> unpackKey(std::string_view key) {
|
||||
kv::KeyPrefix prefix;
|
||||
InodeId::Key inodeId;
|
||||
Uuid sessionId;
|
||||
auto result = Deserializer::deserRawArgs(key, prefix, inodeId, sessionId);
|
||||
XLOGF_IF(DFATAL, result.hasError(), "Failed to unpack key {:02x}, err {}", FMT_KEY(key), result.error());
|
||||
XLOGF_IF(DFATAL,
|
||||
prefix != keyPrefix,
|
||||
"SessionByInode prefix not match {} != {}",
|
||||
(uint32_t)prefix,
|
||||
(uint32_t)keyPrefix);
|
||||
return {InodeId::unpackKey(inodeId), sessionId};
|
||||
}
|
||||
|
||||
static Result<FileSession> unpack(std::string_view key, std::string_view value) {
|
||||
auto [inodeId, sessionId] = unpackKey(key);
|
||||
FileSession session;
|
||||
RETURN_ON_ERROR(serde::deserialize(session, value));
|
||||
if (session.inodeId != inodeId || session.sessionId != sessionId) {
|
||||
XLOGF(DFATAL,
|
||||
"SessionByInode KV not match, key {} -> {} {}, value {}",
|
||||
FMT_KEY(key),
|
||||
inodeId,
|
||||
sessionId,
|
||||
session);
|
||||
return makeError(StatusCode::kDataCorruption);
|
||||
}
|
||||
return session;
|
||||
}
|
||||
};
|
||||
|
||||
// struct SessionByClient {
|
||||
// static constexpr auto keyPrefix = kv::KeyPrefix::ClientSession;
|
||||
// static std::string prefixOf(const Uuid &clientId) { return Serializer::serRawArgs(keyPrefix, clientId); }
|
||||
// static std::string packKey(const Uuid &clientId, const Uuid &sessionId) {
|
||||
// return Serializer::serRawArgs(keyPrefix, clientId, sessionId);
|
||||
// }
|
||||
// static std::pair<Uuid, Uuid> unpackKey(std::string_view key) {
|
||||
// kv::KeyPrefix prefix;
|
||||
// Uuid clientId;
|
||||
// Uuid sessionId;
|
||||
// auto result = Deserializer::deserRawArgs(key, prefix, clientId, sessionId);
|
||||
// XLOGF_IF(DFATAL, result.hasError(), "Failed to unpack key {:02x}, err {}", FMT_KEY(key), result.error());
|
||||
// XLOGF_IF(DFATAL,
|
||||
// prefix != keyPrefix,
|
||||
// "SessionByInode prefix not match {} != {}",
|
||||
// (uint32_t)prefix,
|
||||
// (uint32_t)keyPrefix);
|
||||
// return {clientId, sessionId};
|
||||
// }
|
||||
// static Result<FileSession> unpack(std::string_view key, std::string_view value) {
|
||||
// auto [clientId, sessionId] = unpackKey(key);
|
||||
// FileSession session;
|
||||
// RETURN_ON_ERROR(serde::deserialize(session, value));
|
||||
// if (session.clientId.uuid != clientId || session.sessionId != sessionId) {
|
||||
// XLOGF(DFATAL,
|
||||
// "SessionByClient KV not match, key {} -> {} {}, value {}",
|
||||
// FMT_KEY(key),
|
||||
// clientId,
|
||||
// sessionId,
|
||||
// session);
|
||||
// return makeError(StatusCode::kDataCorruption);
|
||||
// }
|
||||
// return session;
|
||||
// }
|
||||
// };
|
||||
|
||||
template <typename SessionType, typename Id>
|
||||
CoTryTask<std::vector<FileSession>> listSessions(IReadOnlyTransaction &txn, Id id, bool snapshot, size_t limit) {
|
||||
auto prefix = SessionType::prefixOf(id);
|
||||
auto unpack = SessionType::unpack;
|
||||
auto options = kv::TransactionHelper::ListByPrefixOptions().withSnapshot(snapshot).withLimit(limit);
|
||||
co_return co_await kv::TransactionHelper::listByPrefix<FileSession>(txn, prefix, options, unpack);
|
||||
}
|
||||
|
||||
template <typename SessionType, typename Id>
|
||||
CoTryTask<std::optional<FileSession>> loadSession(IReadOnlyTransaction &txn, Id id, Uuid sessionId) {
|
||||
XLOGF(DBG, "Load session {}, {}", id, sessionId);
|
||||
auto key = SessionType::packKey(id, sessionId);
|
||||
auto value = co_await txn.get(key);
|
||||
CO_RETURN_ON_ERROR(value);
|
||||
if (value->has_value()) {
|
||||
auto result = SessionType::unpack(key, **value);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
XLOGF(DBG, "Load session found {}", *result);
|
||||
co_return result;
|
||||
}
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/** FileSession */
|
||||
std::string FileSession::prefix(InodeId inodeId) { return SessionByInode::prefixOf(inodeId); }
|
||||
|
||||
std::string FileSession::packKey(InodeId inodeId, Uuid sessionId) {
|
||||
return SessionByInode::packKey(inodeId, sessionId);
|
||||
}
|
||||
|
||||
Result<std::pair<InodeId, Uuid>> FileSession::unpackByInodeKey(std::string_view key) {
|
||||
return SessionByInode::unpackKey(key);
|
||||
}
|
||||
|
||||
Result<FileSession> FileSession::unpack(std::string_view key, std::string_view value) {
|
||||
FileSession session;
|
||||
auto result = serde::deserialize(session, value);
|
||||
if (result.hasError()) {
|
||||
XLOGF(DFATAL,
|
||||
"FileSession unpack failed, key {}, value {}, error {}",
|
||||
FMT_KEY(key),
|
||||
FMT_KEY(value),
|
||||
result.error());
|
||||
RETURN_ERROR(result);
|
||||
}
|
||||
session.payload = "";
|
||||
return session;
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<FileSession>> FileSession::load(IReadOnlyTransaction &txn, InodeId inodeId, Uuid session) {
|
||||
co_return co_await loadSession<SessionByInode, InodeId>(txn, inodeId, session);
|
||||
}
|
||||
|
||||
CoTryTask<std::vector<FileSession>> FileSession::list(IReadOnlyTransaction &txn,
|
||||
InodeId inodeId,
|
||||
bool snapshot,
|
||||
size_t limit) {
|
||||
co_return co_await listSessions<SessionByInode>(txn, inodeId, snapshot, limit);
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<FileSession>> FileSession::checkExists(IReadWriteTransaction &txn, const InodeId inodeId) {
|
||||
auto exists = co_await snapshotCheckExists(dynamic_cast<IReadOnlyTransaction &>(txn), inodeId);
|
||||
CO_RETURN_ON_ERROR(exists);
|
||||
|
||||
if (!exists) {
|
||||
// NOTE: add range into read conflict set
|
||||
auto prefix = SessionByInode::prefixOf(inodeId);
|
||||
auto end = kv::TransactionHelper::prefixListEndKey(prefix);
|
||||
CO_RETURN_ON_ERROR(co_await txn.addReadConflictRange(kv::TransactionHelper::keyAfter(prefix), end));
|
||||
}
|
||||
|
||||
co_return exists;
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<FileSession>> FileSession::snapshotCheckExists(IReadOnlyTransaction &txn,
|
||||
const InodeId inodeId) {
|
||||
auto prefix = SessionByInode::prefixOf(inodeId);
|
||||
auto end = kv::TransactionHelper::prefixListEndKey(prefix);
|
||||
auto result = co_await txn.snapshotGetRange({prefix, false}, {end, false}, 1);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
|
||||
XLOGF(DBG, "Check session for inodeId {}, cnt {} hasMore {}", inodeId, result->kvs.size(), result->hasMore);
|
||||
|
||||
while (result->kvs.empty() && result->hasMore) {
|
||||
result = co_await txn.snapshotGetRange({prefix, false}, {end, false}, 1);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
XLOGF(DBG, "Check session for inodeId {}, cnt {} hasMore {}", inodeId, result->kvs.size(), result->hasMore);
|
||||
}
|
||||
|
||||
if (!result->kvs.empty()) {
|
||||
auto &kv = result->kvs.at(0);
|
||||
co_return SessionByInode::unpack(kv.key, kv.value);
|
||||
}
|
||||
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
CoTryTask<void> FileSession::removeAll(IReadWriteTransaction &txn, InodeId inodeId) {
|
||||
/* todo: may be can't remove all sessions in 1 transactions */
|
||||
XLOGF(DBG, "SessionManager remove all sessions for {}", inodeId);
|
||||
auto sessions = co_await list(txn, inodeId, false);
|
||||
CO_RETURN_ON_ERROR(sessions);
|
||||
for (const auto &session : *sessions) {
|
||||
CO_RETURN_ON_ERROR(co_await session.remove(txn));
|
||||
}
|
||||
XLOGF_IF(DBG, !sessions->empty(), "SessionManager remove {} sessions of inodeId {}.", sessions->size(), inodeId);
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> FileSession::store(IReadWriteTransaction &txn) const {
|
||||
XLOGF(DBG, "Store session {}", *this);
|
||||
|
||||
// TODO: what if two client generate the same sessionId? should we check its existence at first?
|
||||
auto value = serde::serialize(*this);
|
||||
auto keyByInode = SessionByInode::packKey(inodeId, sessionId);
|
||||
CO_RETURN_ON_ERROR(co_await txn.set(keyByInode, value));
|
||||
// auto keyByClient = SessionByClient::packKey(clientId.uuid, sessionId);
|
||||
// CO_RETURN_ON_ERROR(co_await txn.set(keyByClient, value));
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> FileSession::remove(IReadWriteTransaction &txn) const {
|
||||
XLOGF(DBG, "Remove session {}", *this);
|
||||
|
||||
auto keyByInode = SessionByInode::packKey(inodeId, sessionId);
|
||||
CO_RETURN_ON_ERROR(co_await txn.clear(keyByInode));
|
||||
// auto keyByClient = SessionByClient::packKey(clientId.uuid, sessionId);
|
||||
// CO_RETURN_ON_ERROR(co_await txn.clear(keyByClient));
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<std::vector<FileSession>> FileSession::scan(IReadOnlyTransaction &txn,
|
||||
size_t shard,
|
||||
std::optional<FileSession> prev) {
|
||||
if (shard >= kShard) {
|
||||
co_return std::vector<FileSession>();
|
||||
}
|
||||
|
||||
auto beginKey = SessionByInode::packKey(InodeId(shard), Uuid::max());
|
||||
if (prev) {
|
||||
auto prevKey = SessionByInode::packKey(prev->inodeId, prev->sessionId);
|
||||
beginKey = std::max(beginKey, prevKey);
|
||||
}
|
||||
auto endKey = SessionByInode::packKey(InodeId(shard + 1), Uuid::zero());
|
||||
if (shard + 1 >= kShard) {
|
||||
endKey = SessionByInode::packKey(InodeId(~0ULL), Uuid::zero());
|
||||
}
|
||||
if (beginKey >= endKey) {
|
||||
co_return std::vector<FileSession>();
|
||||
}
|
||||
|
||||
auto result = co_await txn.getRange({beginKey, false}, {endKey, false}, 512);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
|
||||
std::vector<FileSession> sessions;
|
||||
for (auto &[key, value] : result->kvs) {
|
||||
auto session = FileSession::unpack(key, value);
|
||||
CO_RETURN_ON_ERROR(session);
|
||||
sessions.push_back(*session);
|
||||
}
|
||||
co_return sessions;
|
||||
}
|
||||
|
||||
// std::string FileSession::prefix(Uuid clientId) { return SessionByClient::prefixOf(clientId); }
|
||||
//
|
||||
// std::string FileSession::packKey(Uuid clientId, Uuid sessionId) {
|
||||
// return SessionByClient::packKey(clientId, sessionId);
|
||||
// }
|
||||
//
|
||||
// Result<std::pair<Uuid, Uuid>> FileSession::unpackByClientKey(std::string_view key) {
|
||||
// return SessionByClient::unpackKey(key);
|
||||
// }
|
||||
//
|
||||
// CoTryTask<std::optional<FileSession>> FileSession::load(IReadOnlyTransaction &txn, ClientId clientId, Uuid session) {
|
||||
// co_return co_await loadSession<SessionByClient, Uuid>(txn, clientId.uuid, session);
|
||||
// }
|
||||
//
|
||||
// CoTryTask<std::vector<FileSession>> FileSession::list(IReadOnlyTransaction &txn, Uuid clientId, bool snapshot) {
|
||||
// co_return co_await listSessions<SessionByClient>(txn, clientId, snapshot);
|
||||
// }
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
81
src/meta/store/FileSession.h
Normal file
81
src/meta/store/FileSession.h
Normal file
@@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "common/app/ClientId.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "common/utils/Uuid.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
using kv::IReadOnlyTransaction;
|
||||
using kv::IReadWriteTransaction;
|
||||
|
||||
struct FileSession {
|
||||
SERDE_STRUCT_FIELD(inodeId, InodeId());
|
||||
SERDE_STRUCT_FIELD(clientId, ClientId::zero());
|
||||
SERDE_STRUCT_FIELD(sessionId, Uuid::zero());
|
||||
SERDE_STRUCT_FIELD(timestamp, UtcTime());
|
||||
SERDE_STRUCT_FIELD(payload, std::string()); // for placeholder
|
||||
|
||||
public:
|
||||
static std::string prefix(InodeId inodeId);
|
||||
static std::string packKey(InodeId inodeId, Uuid session);
|
||||
static Result<std::pair<InodeId, Uuid>> unpackByInodeKey(std::string_view key);
|
||||
|
||||
static FileSession create(InodeId inodeId, SessionInfo session, UtcTime timestamp = UtcClock::now()) {
|
||||
return {inodeId, session.client, session.session, timestamp};
|
||||
}
|
||||
static FileSession create(InodeId inodeId, ClientId clientId, Uuid sessionId, UtcTime timestamp = UtcClock::now()) {
|
||||
return {inodeId, clientId, sessionId, timestamp};
|
||||
}
|
||||
|
||||
static Result<FileSession> unpack(std::string_view key, std::string_view value);
|
||||
|
||||
static CoTryTask<std::optional<FileSession>> load(IReadOnlyTransaction &txn, InodeId inodeId, Uuid session);
|
||||
static CoTryTask<std::vector<FileSession>> list(IReadOnlyTransaction &txn,
|
||||
InodeId inodeId,
|
||||
bool snapshot,
|
||||
size_t limit = 0);
|
||||
|
||||
static CoTryTask<std::optional<FileSession>> snapshotCheckExists(IReadOnlyTransaction &txn, const InodeId inodeId);
|
||||
static CoTryTask<std::optional<FileSession>> checkExists(IReadWriteTransaction &txn, const InodeId inodeId);
|
||||
static CoTryTask<void> removeAll(IReadWriteTransaction &txn, InodeId inodeId);
|
||||
|
||||
static constexpr size_t kShard = 256;
|
||||
static_assert(kShard == (1 << 8));
|
||||
static CoTryTask<std::vector<FileSession>> scan(IReadOnlyTransaction &txn,
|
||||
size_t shard,
|
||||
std::optional<FileSession> prev);
|
||||
|
||||
CoTryTask<void> store(IReadWriteTransaction &txn) const;
|
||||
CoTryTask<void> remove(IReadWriteTransaction &txn) const;
|
||||
|
||||
// prune, store FileSessions need to be pruned under special InodeId(-1)
|
||||
static FileSession createPrune(ClientId clientId, Uuid sessionId) {
|
||||
return FileSession::create(InodeId(-1), clientId, sessionId);
|
||||
}
|
||||
|
||||
static CoTryTask<std::vector<FileSession>> listPrune(IReadOnlyTransaction &txn, size_t limit) {
|
||||
co_return co_await FileSession::list(txn, InodeId(-1), true, limit);
|
||||
}
|
||||
|
||||
// static CoTryTask<std::optional<FileSession>> load(IReadOnlyTransaction &txn, ClientId clientId, Uuid session);
|
||||
// static CoTryTask<std::vector<FileSession>> list(IReadOnlyTransaction &txn, Uuid clientId, bool snapshot);
|
||||
// static std::string prefix(Uuid clientId);
|
||||
// static std::string packKey(Uuid clientId, Uuid session);
|
||||
// static Result<std::pair<Uuid, Uuid>> unpackByClientKey(std::string_view key);
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
136
src/meta/store/Idempotent.h
Normal file
136
src/meta/store/Idempotent.h
Normal file
@@ -0,0 +1,136 @@
|
||||
#pragma once
|
||||
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/serde/MessagePacket.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/Nameof.hpp"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/SerDeser.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "common/utils/Uuid.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
/** Store transaction result to ensure idempotency during retries. Currently used for remove operations.
|
||||
*/
|
||||
struct Idempotent {
|
||||
static constexpr auto keyPrefix = kv::KeyPrefix::MetaIdempotent;
|
||||
|
||||
template <typename T>
|
||||
struct Record {
|
||||
Record() requires(std::is_same_v<T, Void>) = default;
|
||||
explicit Record(const T &result)
|
||||
: result(result) {}
|
||||
|
||||
SERDE_STRUCT_FIELD(clientId, Uuid::zero());
|
||||
SERDE_STRUCT_FIELD(requestId, Uuid::zero());
|
||||
SERDE_STRUCT_FIELD(timestamp, UtcTime());
|
||||
SERDE_STRUCT_FIELD(result, serde::Payload<T>());
|
||||
|
||||
public:
|
||||
std::string packKey() const {
|
||||
// requestId + clientId to avoid hotspot
|
||||
XLOGF_IF(FATAL, clientId == Uuid::zero() || requestId == Uuid::zero(), "invalid uuid");
|
||||
return Serializer::serRawArgs(keyPrefix, requestId, clientId);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class ReqInfo>
|
||||
static CoTryTask<std::optional<Result<T>>> load(kv::IReadWriteTransaction &txn,
|
||||
const Uuid clientId,
|
||||
const Uuid requestId,
|
||||
const ReqInfo &req) {
|
||||
if (clientId == Uuid::zero() || requestId == Uuid::zero()) {
|
||||
XLOGF(CRITICAL, "Request invalid uuid {} {}", clientId, requestId);
|
||||
co_return makeError(StatusCode::kInvalidArg, "Invalid uuid");
|
||||
}
|
||||
|
||||
Record<Void> record;
|
||||
record.clientId = clientId;
|
||||
record.requestId = requestId;
|
||||
auto res = co_await txn.get(record.packKey());
|
||||
CO_RETURN_ON_ERROR(res);
|
||||
if (!res->has_value()) {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
auto desRes = serde::deserialize(record, res->value());
|
||||
if (!desRes) {
|
||||
XLOGF(DFATAL, "IdempotentRecord deserialize failed, request {}, error {}", req, desRes.error());
|
||||
co_return makeError(StatusCode::kDataCorruption, "IdempotentRecord des failed");
|
||||
}
|
||||
if (record.clientId != clientId || record.requestId != requestId) {
|
||||
XLOGF(DFATAL, "IdempotentRecord mismatch, request {}, record {}", req, record);
|
||||
co_return makeError(MetaCode::kInconsistent, "IdempotentRecord uuid mismatch");
|
||||
}
|
||||
|
||||
Result<T> result = makeError(StatusCode::kUnknown);
|
||||
auto desResult = serde::deserialize(result, record.result);
|
||||
if (!desResult) {
|
||||
XLOGF(DFATAL, "IdempotentRecord deserialize result failed, request {}, error {}", req, desResult.error());
|
||||
co_return makeError(StatusCode::kDataCorruption, "IdempotentRecord deserialize result failed");
|
||||
}
|
||||
XLOGF(CRITICAL, "Duplicated request {}, result {}, prev {}, now {}", req, result, record.timestamp, UtcTime::now());
|
||||
co_return std::optional(result);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static CoTryTask<Void> store(kv::IReadWriteTransaction &txn,
|
||||
const Uuid clientId,
|
||||
const Uuid requestId,
|
||||
const Result<T> &result) {
|
||||
Record<Result<T>> record(result);
|
||||
record.clientId = clientId;
|
||||
record.requestId = requestId;
|
||||
record.timestamp = UtcClock::now();
|
||||
|
||||
auto key = record.packKey();
|
||||
auto value = serde::serialize(record);
|
||||
co_return co_await txn.set(key, value);
|
||||
}
|
||||
|
||||
static CoTryTask<std::pair<std::string, bool>> clean(kv::IReadWriteTransaction &txn,
|
||||
std::optional<std::string> prev,
|
||||
Duration expire,
|
||||
size_t limit,
|
||||
size_t &total,
|
||||
size_t &cleaned) {
|
||||
auto now = UtcClock::now();
|
||||
auto prefix = Serializer::serRawArgs(keyPrefix);
|
||||
auto begin = prev.value_or(prefix);
|
||||
XLOGF_IF(FATAL, begin < prefix, "{} < {}", begin, prefix);
|
||||
auto end = kv::TransactionHelper::prefixListEndKey(prefix);
|
||||
kv::IReadOnlyTransaction::KeySelector selBegin{begin, false};
|
||||
kv::IReadOnlyTransaction::KeySelector selEnd{end, false};
|
||||
auto res = co_await txn.getRange(selBegin, selEnd, limit);
|
||||
CO_RETURN_ON_ERROR(res);
|
||||
|
||||
total = res->kvs.size();
|
||||
cleaned = 0;
|
||||
for (const auto &kv : res->kvs) {
|
||||
Record<Void> record;
|
||||
auto des = serde::deserialize(record, kv.value);
|
||||
if (!des) {
|
||||
XLOGF(CRITICAL, "IdempotentRecord deserialize failed {}", des.error());
|
||||
continue;
|
||||
}
|
||||
if (record.timestamp + expire < now) {
|
||||
cleaned++;
|
||||
CO_RETURN_ON_ERROR(co_await txn.clear(kv.key));
|
||||
}
|
||||
}
|
||||
|
||||
auto nextPrev = res->kvs.empty() ? begin : res->kvs.back().key;
|
||||
co_return std::pair<std::string, bool>{nextPrev, res->hasMore};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
226
src/meta/store/Inode.cc
Normal file
226
src/meta/store/Inode.cc
Normal file
@@ -0,0 +1,226 @@
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <linux/fs.h>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/KeyPrefix.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/SerDeser.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "meta/store/BatchContext.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/** Inode */
|
||||
Result<Inode> Inode::newUnpacked(std::string_view key, std::string_view value) {
|
||||
Inode inode;
|
||||
RETURN_ON_ERROR(inode.unpackKey(key));
|
||||
if (auto result = serde::deserialize(inode.data(), value); result.hasError()) {
|
||||
XLOGF(CRITICAL, "Failed to deserialize inode value {}, data corruption!", result.error());
|
||||
return makeError(StatusCode::kDataCorruption);
|
||||
}
|
||||
return std::move(inode);
|
||||
}
|
||||
|
||||
std::string Inode::packKey(InodeId id) {
|
||||
static constexpr auto prefix = kv::KeyPrefix::Inode;
|
||||
auto inodeId = id.packKey();
|
||||
return Serializer::serRawArgs(prefix, inodeId);
|
||||
}
|
||||
|
||||
std::string Inode::packKey() const { return packKey(id); }
|
||||
|
||||
Result<Void> Inode::unpackKey(std::string_view key) {
|
||||
kv::KeyPrefix prefix;
|
||||
InodeId::Key inodeId;
|
||||
if (auto result = Deserializer::deserRawArgs(key, prefix, inodeId); result.hasError()) {
|
||||
XLOGF(CRITICAL, "Failed to deserialize inode key {}, data corruption!", result.error());
|
||||
return makeError(StatusCode::kDataCorruption);
|
||||
}
|
||||
assert(prefix == kv::KeyPrefix::Inode);
|
||||
id = InodeId::unpackKey(inodeId);
|
||||
|
||||
return Void{};
|
||||
}
|
||||
|
||||
template <const bool SNAPSHOT>
|
||||
CoTryTask<std::optional<Inode>> Inode::loadImpl(IReadOnlyTransaction &txn, InodeId id) {
|
||||
auto func = SNAPSHOT ? &IReadOnlyTransaction::snapshotGet : &IReadOnlyTransaction::get;
|
||||
auto result = co_await (txn.*func)(packKey(id));
|
||||
if (result.hasError()) {
|
||||
XLOGF(ERR, "Failed to load inode {}, error {}", id, result.error());
|
||||
CO_RETURN_ERROR(result);
|
||||
}
|
||||
if (auto &value = *result; value.has_value()) {
|
||||
Inode inode;
|
||||
inode.id = id;
|
||||
#ifndef NDEBUG
|
||||
inode.snapshotLoaded_ = SNAPSHOT;
|
||||
#endif
|
||||
if (auto des = serde::deserialize(inode.data(), *value); des.hasError()) {
|
||||
XLOGF(CRITICAL, "Failed to deserialize inode {}, {}, data corruption!!", inode.id, des.error());
|
||||
co_return makeError(StatusCode::kDataCorruption, fmt::format("deserialize inode {} failed", id));
|
||||
}
|
||||
co_return std::move(inode);
|
||||
} else {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<Inode>> Inode::snapshotLoad(IReadOnlyTransaction &txn, InodeId id) {
|
||||
if (auto batch = BatchContext::get(); batch) {
|
||||
auto guard = batch->loadInode(id);
|
||||
if (guard.needLoad) {
|
||||
auto v = co_await loadImpl<true>(txn, id);
|
||||
guard.set(v);
|
||||
co_return std::move(v);
|
||||
} else {
|
||||
co_return co_await guard.coAwait();
|
||||
}
|
||||
} else {
|
||||
co_return co_await loadImpl<true>(txn, id);
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<Inode>> Inode::load(IReadOnlyTransaction &txn, InodeId id) {
|
||||
co_return co_await loadImpl<false>(txn, id);
|
||||
}
|
||||
|
||||
CoTryTask<void> Inode::store(IReadWriteTransaction &txn) const {
|
||||
static const std::map<InodeId, Acl> treeRoots{{InodeId::root(), Acl::root()}, {InodeId::gcRoot(), Acl::gcRoot()}};
|
||||
assert(!snapshotLoaded_);
|
||||
if (treeRoots.contains(id)) {
|
||||
if (!isDirectory() || (!asDirectory().name.empty() && asDirectory().name != "/")) {
|
||||
XLOGF(DFATAL, "Store invalid root inode, {}", *this);
|
||||
co_return makeError(MetaCode::kFoundBug, fmt::format("Store invalid special inode {}", *this));
|
||||
}
|
||||
auto expectedAcl = treeRoots.at(id);
|
||||
expectedAcl.iflags = acl.iflags;
|
||||
if (acl != expectedAcl) {
|
||||
XLOGF(DFATAL, "Try change root inode {} acl to {}", id, acl);
|
||||
co_return makeError(MetaCode::kNoPermission, fmt::format("try change root {} acl to {}", id, acl));
|
||||
}
|
||||
} else if (isDirectory()) {
|
||||
auto &name = asDirectory().name;
|
||||
if (UNLIKELY(name == "." || name == ".." || std::find(name.begin(), name.end(), '/') != name.end())) {
|
||||
XLOGF(DFATAL, "DirEntry name {} is invalid, should never happen!!!", name);
|
||||
co_return makeError(MetaCode::kFoundBug, fmt::format("Directory {} invalid DirEntry name {}", id, name));
|
||||
}
|
||||
} else if (isFile()) {
|
||||
if (auto valid = asFile().layout.valid(false /*allowEmpty*/); valid.hasError()) {
|
||||
XLOGF(DFATAL, "File {} has a invalid layout {}, error {}", id, asFile().layout, valid.error());
|
||||
co_return makeError(MetaCode::kFoundBug,
|
||||
fmt::format("File {} invalid layout {}, {}", id, asFile().layout, valid.error()));
|
||||
}
|
||||
}
|
||||
|
||||
auto key = packKey();
|
||||
auto value = serde::serialize(data());
|
||||
if (auto result = co_await txn.set(key, value); result.hasError()) {
|
||||
XLOGF(ERR, "Failed to store inode {}, error {}", id, result.error());
|
||||
co_return result;
|
||||
}
|
||||
XLOGF(DBG, "Store inode {}", id);
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<void> Inode::remove(IReadWriteTransaction &txn) const {
|
||||
assert(!snapshotLoaded_);
|
||||
if (UNLIKELY(id.isTreeRoot())) {
|
||||
XLOGF(DFATAL, "Don't allow remove tree root {}!", id);
|
||||
co_return makeError(MetaCode::kFoundBug, "Try remove tree root");
|
||||
}
|
||||
if (UNLIKELY(acl.iflags & FS_IMMUTABLE_FL)) {
|
||||
XLOGF(DFATAL, "Try remove inode {} with FS_IMMUTABLE_FL", id);
|
||||
co_return makeError(MetaCode::kFoundBug, "Try remove inode with FS_IMMUTABLE_FL");
|
||||
}
|
||||
XLOGF(DBG, "Remove inode {}", id);
|
||||
co_return co_await txn.clear(packKey());
|
||||
}
|
||||
|
||||
CoTryTask<meta::DirEntry> Inode::snapshotLoadDirEntry(IReadOnlyTransaction &txn) const {
|
||||
if (!isDirectory()) {
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
|
||||
auto parent = asDirectory().parent;
|
||||
auto name = asDirectory().name;
|
||||
std::optional<DirEntry> entry;
|
||||
if (!name.empty()) {
|
||||
auto result = co_await DirEntry::snapshotLoad(txn, parent, name);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
entry = std::move(*result);
|
||||
} else {
|
||||
std::string prev;
|
||||
while (true) {
|
||||
auto entries = co_await DirEntryList::snapshotLoad(txn, parent, prev, -1, false);
|
||||
CO_RETURN_ON_ERROR(entries);
|
||||
for (auto &item : entries->entries) {
|
||||
if (item.id == id) {
|
||||
entry = std::move(item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!entries->more) break;
|
||||
if (!entries->entries.empty()) prev = entries->entries.rbegin()->name;
|
||||
}
|
||||
}
|
||||
|
||||
if (!entry.has_value()) {
|
||||
XLOGF(WARN, "DirEntry of directory {} not found, parent {}, path {}, maybe deleted!", id, parent, name);
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
} else if (entry->id != id) {
|
||||
XLOGF(WARN, "InodeId of DirEntry {} != {}, maybe deleted", *entry, id);
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
|
||||
co_return *entry;
|
||||
}
|
||||
|
||||
CoTryTask<Void> Inode::loadAncestors(IReadWriteTransaction &txn, std::vector<Inode> &ancestors, InodeId parent) {
|
||||
auto ancestorIds = std::set<InodeId>();
|
||||
auto currAncestorId = parent;
|
||||
FAULT_INJECTION_SET_FACTOR(4);
|
||||
while (true) {
|
||||
if (UNLIKELY(ancestorIds.contains(currAncestorId))) {
|
||||
XLOGF(DFATAL, "Inode found duplicated ancestor, parent {}, duplicated {}", parent, currAncestorId);
|
||||
co_return makeError(MetaCode::kInconsistent, "directory tree contains loop");
|
||||
}
|
||||
ancestorIds.insert(currAncestorId);
|
||||
|
||||
// NOTE: add dst's ancestors inode into read conflict set
|
||||
auto currAncestor = (co_await Inode::load(txn, currAncestorId)).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(currAncestor);
|
||||
ancestors.push_back(*currAncestor);
|
||||
if (UNLIKELY(!currAncestor->isDirectory())) {
|
||||
XLOGF(DFATAL, "Entry {}, Inode {} is not directory", currAncestorId, *currAncestor);
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
|
||||
if (currAncestor->asDirectory().parent == currAncestor->id) {
|
||||
break;
|
||||
}
|
||||
|
||||
currAncestorId = currAncestor->asDirectory().parent;
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
84
src/meta/store/Inode.h
Normal file
84
src/meta/store/Inode.h
Normal file
@@ -0,0 +1,84 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Path.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "common/utils/Uuid.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
using hf3fs::kv::IReadOnlyTransaction;
|
||||
using hf3fs::kv::IReadWriteTransaction;
|
||||
|
||||
class Inode : public meta::Inode {
|
||||
public:
|
||||
using Base = meta::Inode;
|
||||
using Base::Base;
|
||||
|
||||
Inode(Base base)
|
||||
: Base(std::move(base)) {}
|
||||
Inode(InodeId id, Acl acl, UtcTime time, std::variant<File, Directory, Symlink> type)
|
||||
: Base{id, InodeData{type, acl, 1, time, time, time}} {}
|
||||
|
||||
static Inode newFile(InodeId id, Acl acl, Layout layout, UtcTime time) { return Inode(id, acl, time, File(layout)); }
|
||||
|
||||
static Inode newDirectory(InodeId id, InodeId parent, std::string name, Acl acl, Layout layout, UtcTime time) {
|
||||
return Inode(id, acl, time, Directory{parent, std::move(layout), std::move(name)});
|
||||
}
|
||||
|
||||
static Inode newSymlink(InodeId id, Path target, Uid uid, Gid gid, UtcTime time) {
|
||||
static constexpr Permission perm{0777}; // permission of symlink is never used, and won't changed
|
||||
return Inode(id, Acl(uid, gid, perm), time, Symlink{std::move(target)});
|
||||
}
|
||||
|
||||
/** key format: kInodePrefx + InodeId.key */
|
||||
static std::string packKey(InodeId id);
|
||||
std::string packKey() const;
|
||||
Result<Void> unpackKey(std::string_view key);
|
||||
|
||||
static Result<Inode> newUnpacked(std::string_view key, std::string_view value);
|
||||
|
||||
// The difference of `snapshotLoad` and `load` is the former won't add key of inode into read conflict set.
|
||||
static CoTryTask<std::optional<Inode>> snapshotLoad(IReadOnlyTransaction &txn, InodeId id);
|
||||
static CoTryTask<std::optional<Inode>> load(IReadOnlyTransaction &txn, InodeId id);
|
||||
|
||||
CoTryTask<void> addIntoReadConflict(IReadWriteTransaction &txn) {
|
||||
#ifndef NDEBUG
|
||||
snapshotLoaded_ = false;
|
||||
#endif
|
||||
co_return co_await txn.addReadConflict(packKey());
|
||||
}
|
||||
|
||||
CoTryTask<void> store(IReadWriteTransaction &txn) const;
|
||||
/** Remove this inode */
|
||||
CoTryTask<void> remove(IReadWriteTransaction &txn) const;
|
||||
|
||||
CoTryTask<DirEntry> snapshotLoadDirEntry(IReadOnlyTransaction &txn) const;
|
||||
|
||||
static CoTryTask<Void> loadAncestors(IReadWriteTransaction &txn, std::vector<Inode> &ancestors, InodeId parent);
|
||||
|
||||
private:
|
||||
template <const bool SNAPSHOT>
|
||||
static CoTryTask<std::optional<Inode>> loadImpl(IReadOnlyTransaction &txn, InodeId id);
|
||||
|
||||
#ifndef NDEBUG
|
||||
mutable bool snapshotLoaded_ = false;
|
||||
#endif
|
||||
};
|
||||
|
||||
static_assert(serde::SerializableToJson<Inode>);
|
||||
} // namespace hf3fs::meta::server
|
||||
93
src/meta/store/MetaStore.cc
Normal file
93
src/meta/store/MetaStore.cc
Normal file
@@ -0,0 +1,93 @@
|
||||
#include "meta/store/MetaStore.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "common/app/NodeId.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/mgmtd/ChainRef.h"
|
||||
#include "meta/components/ChainAllocator.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class InitFsOp : public IOperation<Void> {
|
||||
public:
|
||||
InitFsOp(ChainAllocator &chainAlloc, Layout rootLayout)
|
||||
: chainAlloc_(chainAlloc),
|
||||
rootLayout_(std::move(rootLayout)) {}
|
||||
|
||||
bool isReadOnly() final { return false; }
|
||||
CoTryTask<Void> run(IReadWriteTransaction &txn) final {
|
||||
XLOGF(INFO, "MetaStore::initFs");
|
||||
auto valid = co_await chainAlloc_.checkLayoutValid(rootLayout_);
|
||||
if (valid.hasError()) {
|
||||
XLOGF(ERR, "RootLayout is not valid, {}", valid.error());
|
||||
co_return makeError(std::move(valid.error()));
|
||||
}
|
||||
|
||||
// check tree roots exist
|
||||
auto exists = [](auto &val) { return val.has_value(); };
|
||||
auto root = (co_await Inode::load(txn, InodeId::root())).then(exists);
|
||||
auto gcRoot = (co_await Inode::load(txn, InodeId::gcRoot())).then(exists);
|
||||
CO_RETURN_ON_ERROR(root);
|
||||
CO_RETURN_ON_ERROR(gcRoot);
|
||||
|
||||
if (!*root) {
|
||||
// no root, need create a root
|
||||
// root Inode's parent is itself, this simplify path resolution: eg /../../../a -> /a
|
||||
Inode root = Inode::newDirectory(InodeId::root(),
|
||||
InodeId::root(),
|
||||
"/",
|
||||
Acl::root(),
|
||||
rootLayout_,
|
||||
UtcClock::now().castGranularity(1_ms));
|
||||
CO_RETURN_ON_ERROR(co_await root.store(txn));
|
||||
}
|
||||
if (!*gcRoot) {
|
||||
// no GC root, need create a GC root
|
||||
Inode gcRoot = Inode::newDirectory(InodeId::gcRoot(),
|
||||
InodeId::gcRoot(),
|
||||
"/",
|
||||
Acl::gcRoot(),
|
||||
Layout() /* Invalid layout */,
|
||||
UtcClock::now().castGranularity(1_ms));
|
||||
CO_RETURN_ON_ERROR(co_await gcRoot.store(txn));
|
||||
}
|
||||
|
||||
co_return Void();
|
||||
}
|
||||
|
||||
// NOTE: these function won't be called in InitCluster.cc
|
||||
void retry(const Status &) final {}
|
||||
void finish(const Result<Void> &) final {}
|
||||
|
||||
private:
|
||||
ChainAllocator &chainAlloc_;
|
||||
Layout rootLayout_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<Void> MetaStore::initFileSystem(ChainAllocator &chainAlloc, Layout rootLayout) {
|
||||
return std::make_unique<InitFsOp>(chainAlloc, rootLayout);
|
||||
}
|
||||
|
||||
class BenchRpcOp : public ReadOnlyOperation<TestRpcRsp> {
|
||||
public:
|
||||
BenchRpcOp(MetaStore &store)
|
||||
: ReadOnlyOperation<TestRpcRsp>::ReadOnlyOperation<TestRpcRsp>(store) {}
|
||||
CoTryTask<TestRpcRsp> run(IReadOnlyTransaction &) override { co_return TestRpcRsp{}; }
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<TestRpcRsp> MetaStore::testRpc(const TestRpcReq &) { return std::make_unique<BenchRpcOp>(*this); }
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
158
src/meta/store/MetaStore.h
Normal file
158
src/meta/store/MetaStore.h
Normal file
@@ -0,0 +1,158 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/core/ignore_unused.hpp>
|
||||
#include <fcntl.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/lang/Bits.h>
|
||||
#include <gtest/gtest_prod.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "client/storage/StorageClient.h"
|
||||
#include "common/kv/IKVEngine.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/WithTransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/monitor/Sample.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Path.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/Status.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/base/Config.h"
|
||||
#include "meta/components/AclCache.h"
|
||||
#include "meta/components/ChainAllocator.h"
|
||||
#include "meta/components/FileHelper.h"
|
||||
#include "meta/components/GcManager.h"
|
||||
#include "meta/components/InodeIdAllocator.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/PathResolve.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
using hf3fs::kv::IReadOnlyTransaction;
|
||||
using hf3fs::kv::IReadWriteTransaction;
|
||||
|
||||
template <typename Rsp>
|
||||
class IOperation {
|
||||
public:
|
||||
using RspT = Rsp;
|
||||
|
||||
virtual ~IOperation() = default;
|
||||
|
||||
virtual bool isReadOnly() = 0;
|
||||
virtual bool retryMaybeCommitted() { return true; }
|
||||
|
||||
virtual bool needIdempotent(Uuid &clientId, Uuid &requestId) const {
|
||||
boost::ignore_unused(clientId, requestId);
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual std::string_view name() const { return "other"; }
|
||||
virtual flat::Uid user() const { return flat::Uid(-1); }
|
||||
|
||||
virtual CoTryTask<Rsp> run(IReadWriteTransaction &) = 0;
|
||||
|
||||
virtual void retry(const Status &) = 0;
|
||||
virtual void finish(const Result<Rsp> &) = 0;
|
||||
|
||||
CoTryTask<Rsp> operator()(IReadWriteTransaction &txn) { co_return co_await run(txn); }
|
||||
};
|
||||
|
||||
class MetaStore {
|
||||
public:
|
||||
MetaStore(const Config &config,
|
||||
analytics::StructuredTraceLog<MetaEventTrace> &metaEventTraceLog,
|
||||
std::shared_ptr<Distributor> distributor,
|
||||
std::shared_ptr<InodeIdAllocator> inodeAlloc,
|
||||
std::shared_ptr<ChainAllocator> chainAlloc,
|
||||
std::shared_ptr<FileHelper> fileHelper,
|
||||
std::shared_ptr<SessionManager> sessionManager,
|
||||
std::shared_ptr<GcManager> gcManager)
|
||||
: config_(config),
|
||||
metaEventTraceLog_(metaEventTraceLog),
|
||||
distributor_(distributor),
|
||||
inodeAlloc_(inodeAlloc),
|
||||
chainAlloc_(chainAlloc),
|
||||
fileHelper_(fileHelper),
|
||||
sessionManager_(sessionManager),
|
||||
gcManager_(gcManager),
|
||||
aclCache_(2 << 20 /* 2m acl */) {}
|
||||
|
||||
auto &getEventTraceLog() { return metaEventTraceLog_; }
|
||||
|
||||
template <typename Rsp>
|
||||
using Op = IOperation<Rsp>;
|
||||
|
||||
template <typename Rsp>
|
||||
using OpPtr = std::unique_ptr<IOperation<Rsp>>;
|
||||
|
||||
static OpPtr<Void> initFileSystem(ChainAllocator &chainAlloc, Layout rootLayout);
|
||||
|
||||
OpPtr<Void> initFs(Layout rootLayout) { return MetaStore::initFileSystem(*chainAlloc_, rootLayout); }
|
||||
|
||||
OpPtr<StatFsRsp> statFs(const StatFsReq &req);
|
||||
|
||||
OpPtr<StatRsp> stat(const StatReq &req);
|
||||
|
||||
OpPtr<BatchStatRsp> batchStat(const BatchStatReq &req);
|
||||
|
||||
OpPtr<BatchStatByPathRsp> batchStatByPath(const BatchStatByPathReq &req);
|
||||
|
||||
OpPtr<GetRealPathRsp> getRealPath(const GetRealPathReq &req);
|
||||
|
||||
OpPtr<OpenRsp> open(OpenReq &req);
|
||||
|
||||
OpPtr<CreateRsp> tryOpen(CreateReq &req);
|
||||
|
||||
OpPtr<MkdirsRsp> mkdirs(const MkdirsReq &req);
|
||||
|
||||
OpPtr<SymlinkRsp> symlink(const SymlinkReq &req);
|
||||
|
||||
OpPtr<RemoveRsp> remove(const RemoveReq &req);
|
||||
|
||||
OpPtr<RenameRsp> rename(const RenameReq &req);
|
||||
|
||||
OpPtr<ListRsp> list(const ListReq &req);
|
||||
|
||||
OpPtr<SyncRsp> sync(const SyncReq &req);
|
||||
|
||||
OpPtr<HardLinkRsp> hardLink(const HardLinkReq &req);
|
||||
|
||||
OpPtr<SetAttrRsp> setAttr(const SetAttrReq &req);
|
||||
|
||||
OpPtr<PruneSessionRsp> pruneSession(const PruneSessionReq &req);
|
||||
|
||||
OpPtr<TestRpcRsp> testRpc(const TestRpcReq &req);
|
||||
|
||||
OpPtr<LockDirectoryRsp> lockDirectory(const LockDirectoryReq &req);
|
||||
|
||||
private:
|
||||
template <typename>
|
||||
FRIEND_TEST(TestRemove, GC);
|
||||
|
||||
template <typename Rsp>
|
||||
friend class Operation;
|
||||
|
||||
const Config &config_;
|
||||
analytics::StructuredTraceLog<MetaEventTrace> &metaEventTraceLog_;
|
||||
std::shared_ptr<Distributor> distributor_;
|
||||
std::shared_ptr<InodeIdAllocator> inodeAlloc_;
|
||||
std::shared_ptr<ChainAllocator> chainAlloc_;
|
||||
std::shared_ptr<FileHelper> fileHelper_;
|
||||
std::shared_ptr<SessionManager> sessionManager_;
|
||||
std::shared_ptr<GcManager> gcManager_;
|
||||
AclCache aclCache_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
265
src/meta/store/Operation.h
Normal file
265
src/meta/store/Operation.h
Normal file
@@ -0,0 +1,265 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/monitor/Sample.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "fbs/meta/Utils.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
#include "fdb/FDBTransaction.h"
|
||||
#include "meta/components/AclCache.h"
|
||||
#include "meta/components/GcManager.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/event/Event.h"
|
||||
#include "meta/store/Idempotent.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
|
||||
#define OPERATION_TAGS(reqName) \
|
||||
std::string_view name() const override { return MetaSerde<>::getRpcName(reqName); } \
|
||||
flat::Uid user() const override { return reqName.user.uid; }
|
||||
|
||||
#define CHECK_REQUEST(reqName) \
|
||||
do { \
|
||||
if (auto result = reqName.valid(); UNLIKELY(result.hasError())) { \
|
||||
auto rpcName = MetaSerde<>::getRpcName(reqName); \
|
||||
XLOGF(WARN, "{} get invalid req, error {}", rpcName, result.error()); \
|
||||
CO_RETURN_ERROR(result); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
template <typename Rsp>
|
||||
class Operation : public IOperation<Rsp> {
|
||||
public:
|
||||
Operation(MetaStore &meta)
|
||||
: meta_(meta) {}
|
||||
|
||||
bool isReadOnly() override { return false; }
|
||||
CoTryTask<Rsp> run(IReadWriteTransaction &) override = 0;
|
||||
|
||||
void retry(const Status &) override { clearEvents(); }
|
||||
|
||||
void finish(const Result<Rsp> &result) override {
|
||||
if (!result.hasError()) {
|
||||
// success
|
||||
for (const auto &event : events_) {
|
||||
event.log();
|
||||
}
|
||||
|
||||
for (const auto &trace : traces_) {
|
||||
auto &traceLog = meta_.getEventTraceLog();
|
||||
traceLog.append(trace);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
const Config &config() const { return meta_.config_; }
|
||||
|
||||
InodeIdAllocator &inodeIdAlloc() { return *meta_.inodeAlloc_; }
|
||||
ChainAllocator &chainAlloc() { return *meta_.chainAlloc_; }
|
||||
FileHelper &fileHelper() { return *meta_.fileHelper_; }
|
||||
SessionManager &sessionManager() { return *meta_.sessionManager_; }
|
||||
GcManager &gcManager() { return *meta_.gcManager_; }
|
||||
AclCache &aclCache() { return meta_.aclCache_; }
|
||||
Distributor &distributor() { return *meta_.distributor_; }
|
||||
|
||||
UtcTime now() const { return UtcClock::now().castGranularity(config().time_granularity()); }
|
||||
|
||||
PathResolveOp resolve(IReadOnlyTransaction &txn, const UserInfo &user, Path *path = nullptr) {
|
||||
return PathResolveOp(txn,
|
||||
aclCache(),
|
||||
user,
|
||||
path,
|
||||
config().max_symlink_count(),
|
||||
config().max_symlink_depth(),
|
||||
config().acl_cache_time());
|
||||
}
|
||||
|
||||
CoTryTask<InodeId> allocateInodeId(IReadWriteTransaction &txn, bool newChunkEngine) {
|
||||
auto newId = co_await inodeIdAlloc().allocate();
|
||||
CO_RETURN_ON_ERROR(newId);
|
||||
|
||||
if (newChunkEngine) {
|
||||
newId = InodeId::withNewChunkEngine(*newId);
|
||||
}
|
||||
|
||||
if (config().inodeId_check_unique()) {
|
||||
auto loadResult = co_await Inode::load(txn, *newId);
|
||||
CO_RETURN_ON_ERROR(loadResult);
|
||||
if (loadResult->has_value()) {
|
||||
XLOGF_IF(FATAL,
|
||||
config().inodeId_abort_on_duplicate(),
|
||||
"InodeIdAllocator get duplicated InodeId {}",
|
||||
newId.value());
|
||||
XLOGF(DFATAL, "InodeIdAllocator get duplicated InodeId {}", newId.value());
|
||||
co_return makeError(MetaCode::kInodeIdAllocFailed);
|
||||
}
|
||||
} else {
|
||||
XLOGF_EVERY_MS(WARN, (300 * 1000), "inodeId_check_unique is disabled");
|
||||
}
|
||||
|
||||
co_return newId;
|
||||
}
|
||||
|
||||
void clearEvents() { events_.clear(); }
|
||||
Event &addEvent(Event::Type type) {
|
||||
events_.emplace_back(type);
|
||||
return *events_.rbegin();
|
||||
}
|
||||
void addEvent(Event event) { events_.emplace_back(std::move(event)); }
|
||||
|
||||
void addTrace(MetaEventTrace &&trace) { traces_.emplace_back(std::move(trace)); }
|
||||
|
||||
MetaStore &meta_;
|
||||
std::vector<Event> events_;
|
||||
std::vector<MetaEventTrace> traces_;
|
||||
};
|
||||
|
||||
template <typename Rsp>
|
||||
class ReadOnlyOperation : public Operation<Rsp> {
|
||||
public:
|
||||
ReadOnlyOperation(MetaStore &meta)
|
||||
: Operation<Rsp>(meta) {}
|
||||
|
||||
bool isReadOnly() final { return true; }
|
||||
|
||||
virtual CoTryTask<Rsp> run(IReadOnlyTransaction &) = 0;
|
||||
CoTryTask<Rsp> run(IReadWriteTransaction &txn) final {
|
||||
co_return co_await run(static_cast<IReadOnlyTransaction &>(txn));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Rsp, typename ReqInfo>
|
||||
class OperationDriver {
|
||||
public:
|
||||
OperationDriver(MetaStore::Op<Rsp> &operation, const ReqInfo &req, std::optional<SteadyTime> deadline = std::nullopt)
|
||||
: operation_(operation),
|
||||
req_(req),
|
||||
deadline_(deadline) {}
|
||||
|
||||
CoTryTask<Rsp> run(std::unique_ptr<kv::IReadWriteTransaction> txn,
|
||||
kv::FDBRetryStrategy::Config config,
|
||||
bool readonly,
|
||||
bool enableGrvCache) {
|
||||
config.retryMaybeCommitted = operation_.retryMaybeCommitted();
|
||||
kv::FDBRetryStrategy strategy(config);
|
||||
CO_RETURN_ON_ERROR(strategy.init(txn.get()));
|
||||
|
||||
OperationRecorder::Guard recorder(OperationRecorder::server(), operation_.name(), operation_.user());
|
||||
|
||||
if (readonly && !operation_.isReadOnly()) {
|
||||
co_return makeError(StatusCode::kReadOnlyMode, "FileSystem is in readonly mode.");
|
||||
}
|
||||
|
||||
auto grvCache = operation_.isReadOnly() && enableGrvCache;
|
||||
if (grvCache && dynamic_cast<kv::FDBTransaction *>(txn.get())) {
|
||||
auto fdbTxn = dynamic_cast<kv::FDBTransaction *>(txn.get());
|
||||
CO_RETURN_ON_ERROR(fdbTxn->setOption(FDBTransactionOption::FDB_TR_OPTION_USE_GRV_CACHE, {}));
|
||||
}
|
||||
|
||||
Result<Rsp> result = makeError(MetaCode::kOperationTimeout);
|
||||
auto duplicate = false;
|
||||
while (true) {
|
||||
// check timeout
|
||||
if (deadline_ && deadline_.value() <= SteadyClock::now()) {
|
||||
XLOGF(ERR, "Request {} timeout, return error {}", describe(), result);
|
||||
break;
|
||||
}
|
||||
// run operation
|
||||
result = co_await runAndCommit(*txn, operation_, duplicate);
|
||||
if (ErrorHandling::success(result)) {
|
||||
break;
|
||||
}
|
||||
// retry
|
||||
XLOGF(WARN, "Request {} failed, error {}", describe(), result.error());
|
||||
operation_.retry(result.error());
|
||||
auto retry = co_await strategy.onError(txn.get(), result.error());
|
||||
if (retry.hasError()) {
|
||||
result = makeError(retry.error());
|
||||
break;
|
||||
}
|
||||
recorder.retry()++;
|
||||
}
|
||||
|
||||
if (result.hasError() && result.error().code() == StatusCode::kOK) {
|
||||
XLOGF(DFATAL, "Has error but error code is kOK, {}, {}", describe(), result);
|
||||
result = makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
|
||||
recorder.finish(result, duplicate);
|
||||
operation_.finish(result);
|
||||
co_return result;
|
||||
}
|
||||
|
||||
private:
|
||||
#define IDEMPOTENT_CHECK() \
|
||||
do { \
|
||||
auto idemCheck = co_await Idempotent::load<Rsp>(txn, clientId, requestId, req_); \
|
||||
CO_RETURN_ON_ERROR(idemCheck); \
|
||||
duplicate = idemCheck->has_value(); \
|
||||
if (duplicate) { \
|
||||
co_return idemCheck->value(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
template <typename Handler>
|
||||
std::invoke_result_t<Handler, IReadWriteTransaction &> runAndCommit(IReadWriteTransaction &txn,
|
||||
Handler &&handler,
|
||||
bool &duplicate) {
|
||||
Uuid clientId, requestId;
|
||||
auto readonly = handler.isReadOnly();
|
||||
auto idem = !readonly && operation_.needIdempotent(clientId, requestId);
|
||||
if (idem) {
|
||||
OperationRecorder::server().addIdempotentCount();
|
||||
IDEMPOTENT_CHECK();
|
||||
auto result = co_await handler(txn);
|
||||
if (result) {
|
||||
CO_RETURN_ON_ERROR(co_await Idempotent::store(txn, clientId, requestId, result));
|
||||
CO_RETURN_ON_ERROR(co_await txn.commit());
|
||||
} else if (ErrorHandling::success(result) || !ErrorHandling::retryable(result.error())) {
|
||||
// this is final result, discard other modifications and save result
|
||||
txn.reset();
|
||||
IDEMPOTENT_CHECK();
|
||||
CO_RETURN_ON_ERROR(co_await Idempotent::store(txn, clientId, requestId, result));
|
||||
CO_RETURN_ON_ERROR(co_await txn.commit());
|
||||
}
|
||||
co_return result;
|
||||
} else {
|
||||
auto result = co_await handler(txn);
|
||||
if (!result.hasError() && !readonly) {
|
||||
CO_RETURN_ON_ERROR(co_await txn.commit());
|
||||
}
|
||||
co_return result;
|
||||
}
|
||||
}
|
||||
|
||||
std::string describe() const {
|
||||
if constexpr (std::is_base_of_v<ReqBase, ReqInfo>) {
|
||||
return fmt::format("{}{}", operation_.name(), req_);
|
||||
} else {
|
||||
return std::string(operation_.name());
|
||||
}
|
||||
}
|
||||
|
||||
MetaStore::Op<Rsp> &operation_;
|
||||
const ReqInfo &req_;
|
||||
std::optional<SteadyTime> deadline_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
364
src/meta/store/PathResolve.cc
Normal file
364
src/meta/store/PathResolve.cc
Normal file
@@ -0,0 +1,364 @@
|
||||
#include "meta/store/PathResolve.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <fcntl.h>
|
||||
#include <folly/Overload.h>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/functional/Partial.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
#include <variant>
|
||||
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "meta/components/AclCache.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
namespace {
|
||||
monitor::DistributionRecorder pathComponentsDist("meta_server.path_components");
|
||||
monitor::DistributionRecorder pathComponentsDistUser("meta_server.path_components_by_user");
|
||||
} // namespace
|
||||
|
||||
using ResolveResult = PathResolveOp::ResolveResult;
|
||||
using ResolveRangeResult = PathResolveOp::ResolveRangeResult;
|
||||
|
||||
static CoTryTask<std::variant<std::pair<InodeId, Acl>, Inode, DirEntry>> loadParentAcl(
|
||||
IReadOnlyTransaction &txn,
|
||||
AclCache &cache,
|
||||
const UserInfo &user,
|
||||
const std::variant<InodeId, DirEntry> &parent,
|
||||
Duration cacheTime) {
|
||||
auto parentId = getInodeId(parent);
|
||||
std::variant<std::pair<InodeId, Acl>, Inode, DirEntry> parentInfo;
|
||||
if (std::holds_alternative<DirEntry>(parent)) {
|
||||
// we already have Acl in DirEntry
|
||||
parentInfo = std::get<DirEntry>(parent);
|
||||
if (cacheTime.count() != 0 && std::get<DirEntry>(parent).isDirectory()) {
|
||||
cache.set(parentId, getDirectoryAcl(parentInfo));
|
||||
}
|
||||
} else {
|
||||
if (parentId == InodeId::root()) {
|
||||
parentInfo = DirEntry::root();
|
||||
} else {
|
||||
auto cached = cache.get(parentId, cacheTime);
|
||||
if (cached.has_value()) {
|
||||
parentInfo = std::pair<InodeId, Acl>{parentId, *cached};
|
||||
} else {
|
||||
auto loadResult = (co_await Inode::snapshotLoad(txn, parentId)).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(loadResult);
|
||||
if (cacheTime.count() != 0 && loadResult->isDirectory()) {
|
||||
cache.set(parentId, loadResult->acl);
|
||||
}
|
||||
parentInfo = std::move(*loadResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (getInodeType(parentInfo) != InodeType::Directory) {
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
co_return parentInfo;
|
||||
}
|
||||
|
||||
static CoTryTask<std::variant<std::pair<InodeId, Acl>, Inode, DirEntry>> loadAndCheckParentAcl(
|
||||
IReadOnlyTransaction &txn,
|
||||
AclCache &cache,
|
||||
const UserInfo &user,
|
||||
const std::variant<InodeId, DirEntry> &parent,
|
||||
Duration cacheTime) {
|
||||
auto acl = co_await loadParentAcl(txn, cache, user, parent, cacheTime);
|
||||
CO_RETURN_ON_ERROR(acl);
|
||||
CO_RETURN_ON_ERROR(getDirectoryAcl(*acl).checkPermission(user, AccessType::EXEC));
|
||||
co_return acl;
|
||||
}
|
||||
|
||||
PathResolveOp::~PathResolveOp() {
|
||||
if (pathComponents_) {
|
||||
pathComponentsDist.addSample(pathComponents_);
|
||||
pathComponentsDistUser.addSample(pathComponents_, {{"uid", folly::to<std::string>(user_.uid.toUnderType())}});
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<Inode> PathResolveOp::inode(const PathAt &path, AtFlags flags, bool checkRefCnt) {
|
||||
Result<Inode> inode = makeError(MetaCode::kFoundBug);
|
||||
if (!path.path.has_value() || (path.path->empty() && flags.contains(AT_EMPTY_PATH))) {
|
||||
inode = (co_await Inode::snapshotLoad(txn_, path.parent)).then(checkMetaFound<Inode>);
|
||||
} else {
|
||||
auto entry = co_await this->dirEntry(path, flags);
|
||||
CO_RETURN_ON_ERROR(entry);
|
||||
assert(!flags.followLastSymlink() || !entry->isSymlink());
|
||||
inode = co_await entry->snapshotLoadInode(txn_);
|
||||
// XLOGF_IF(DFATAL, (inode.hasValue() && inode->nlink == 0), "entry {} -> inode {}, nlink == 0", *entry, *inode);
|
||||
}
|
||||
|
||||
if (!inode.hasError() && checkRefCnt && inode->nlink == 0) {
|
||||
co_return makeError(MetaCode::kNotFound,
|
||||
fmt::format("path {}, inode {} is removed", path, inode->id.toHexString()));
|
||||
}
|
||||
co_return inode;
|
||||
}
|
||||
|
||||
CoTryTask<DirEntry> PathResolveOp::dirEntry(const PathAt &path, AtFlags flags) {
|
||||
if (!path.path.has_value()) {
|
||||
co_return makeError(StatusCode::kInvalidArg, "path not set");
|
||||
} else {
|
||||
co_return co_await dirEntry(path.parent, *path.path, flags.followLastSymlink());
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<DirEntry> PathResolveOp::dirEntry(InodeId parent, const Path &path, bool followLastSymlink) {
|
||||
auto resolveResult = co_await this->path(parent, path);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
|
||||
if (auto &entry = resolveResult->dirEntry; entry.has_value() && entry->isSymlink() && followLastSymlink) {
|
||||
XLOGF(DBG, "Resolve dir entry get symlink, follow it.");
|
||||
resolveResult = co_await this->symlink(*entry);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
}
|
||||
|
||||
if (!resolveResult->dirEntry.has_value()) {
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
|
||||
co_return std::move(*resolveResult->dirEntry);
|
||||
}
|
||||
|
||||
CoTryTask<ResolveResult> PathResolveOp::byDirectoryInodeId(InodeId inodeId) {
|
||||
auto inode = (co_await Inode::snapshotLoad(txn_, inodeId)).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
auto entry = co_await inode->snapshotLoadDirEntry(txn_);
|
||||
CO_RETURN_ON_ERROR(entry);
|
||||
auto parentAcl = co_await loadParentAcl(txn_, aclCache_, user_, entry->parent, aclCacheTime_);
|
||||
CO_RETURN_ON_ERROR(parentAcl);
|
||||
co_return ResolveResult(*parentAcl, *entry);
|
||||
}
|
||||
|
||||
CoTryTask<ResolveResult> PathResolveOp::path(const PathAt &path, AtFlags flags) {
|
||||
if (!path.path.has_value()) {
|
||||
co_return makeError(StatusCode::kInvalidArg, "path not set");
|
||||
} else {
|
||||
co_return co_await this->path(path.parent, *path.path, flags.followLastSymlink());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve path, return parentInode of last path component and dirEntry if presents.
|
||||
* Don't follow last symlink link.
|
||||
*/
|
||||
CoTryTask<ResolveResult> PathResolveOp::path(InodeId parent, const Path &path) {
|
||||
XLOGF(DBG, "Resolve path {}/{}", parent, path);
|
||||
auto begin = path.begin();
|
||||
auto resolveResult = co_await this->pathRange(parent, begin, path.end());
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
|
||||
XLOGF(DBG,
|
||||
"Resolve path {}, {} components, {} found, {} missing",
|
||||
path,
|
||||
std::distance(path.begin(), path.end()),
|
||||
std::distance(path.begin(), begin),
|
||||
std::distance(begin, path.end()));
|
||||
|
||||
if (begin == path.end() || ++begin == path.end()) {
|
||||
co_return resolveResult;
|
||||
} else {
|
||||
// some middle path components missing, return kNotFound
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<ResolveResult> PathResolveOp::path(InodeId parent, const Path &path, bool followLastSymlink) {
|
||||
auto resolveResult = co_await this->path(parent, path);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
if (auto &entry = resolveResult->dirEntry; followLastSymlink && entry.has_value() && entry->isSymlink()) {
|
||||
co_return co_await symlink(*entry);
|
||||
}
|
||||
co_return resolveResult;
|
||||
}
|
||||
|
||||
CoTryTask<ResolveRangeResult> PathResolveOp::pathRange(const PathAt &path) {
|
||||
if (!path.path.has_value()) {
|
||||
co_return makeError(StatusCode::kInvalidArg, "path not set");
|
||||
} else {
|
||||
auto begin = path.path->begin();
|
||||
auto end = path.path->end();
|
||||
auto result = co_await pathRange(path.parent, begin, end);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
co_return ResolveRangeResult(std::move(*result), std::accumulate(begin, end, Path(), std::divides()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk along and resolve path components from begin to end.
|
||||
* If parent doesn't exists or path is empty, return kNotFound.
|
||||
* If parent is symlink, return kNotDirectory, if any middle path component points to a symlink, try to resolve it.
|
||||
* If parent or any middle path component is symlink that points to a non-exist path, return kNotFound.
|
||||
* If parent or any middle path component is a file, return kNotDirectory.
|
||||
* If parent or any middle path component points to a deleted directory, return kNotFound.
|
||||
* If user does not have search permission on directory, return kNoPermission.
|
||||
* If there is too much symlink during path resolution, return kTooManySymlinks.
|
||||
* If any path component is missing, begin will points to corresponding path component, and returns its parent Inode.
|
||||
* If resolution success, begin will point to end and returns last parent Inode and DirEntry.
|
||||
*/
|
||||
CoTryTask<ResolveResult> PathResolveOp::pathRange(InodeId parentId,
|
||||
Path::const_iterator &begin,
|
||||
const Path::const_iterator &end) {
|
||||
SCOPE_EXIT {
|
||||
auto dis = std::distance(begin, end);
|
||||
XLOGF_IF(DBG,
|
||||
dis,
|
||||
"PathResolveOp::pathRange {} components missing, {}!",
|
||||
dis,
|
||||
std::accumulate(begin, end, Path(), std::divides()));
|
||||
};
|
||||
std::variant<InodeId, DirEntry> parent(parentId);
|
||||
if (begin == end) {
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
if (*begin == "/") {
|
||||
// lookup from root;
|
||||
parent = InodeId::root();
|
||||
if (++begin == end) {
|
||||
if (trace_) {
|
||||
*trace_ = "/";
|
||||
}
|
||||
// special case: path range only contains "/", just load root inode and make a fake directory entry
|
||||
co_return ResolveResult(DirEntry::root(), DirEntry::root());
|
||||
}
|
||||
}
|
||||
|
||||
FAULT_INJECTION_SET_FACTOR(std::distance(begin, end));
|
||||
while (begin != end) {
|
||||
// do not need to handle "."
|
||||
if (begin->filename_is_dot()) {
|
||||
if (++begin == end) {
|
||||
co_return co_await pathComponent(parent, ".");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// resolve current path component
|
||||
auto resolveResult = co_await pathComponent(parent, *begin);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
if (!resolveResult->dirEntry.has_value()) {
|
||||
// dirEntry not found, just means this path component is missing,
|
||||
// return parentInode and let caller decide create missing path components or not
|
||||
co_return resolveResult;
|
||||
}
|
||||
|
||||
if (++begin == end) {
|
||||
// this is last component, return here
|
||||
co_return resolveResult;
|
||||
}
|
||||
|
||||
// middle path component
|
||||
if (resolveResult->dirEntry->isSymlink()) {
|
||||
resolveResult = co_await this->symlink(*resolveResult->dirEntry);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
if (!resolveResult->dirEntry.has_value()) {
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
}
|
||||
|
||||
if (resolveResult->dirEntry->isFile()) {
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
|
||||
// update parent and continue.
|
||||
parent = std::move(resolveResult->dirEntry.value());
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a single path component.
|
||||
* If parent doesn't exist, return kNotFound or kInconsistent.
|
||||
* If parent exists but is deleted, return kNotFound.
|
||||
* If parent is file or symlink, return kNotDirectory.
|
||||
* If parent is directory, but user doesn't have search permission, return kNoPermission.
|
||||
* Else return parentInode and dirEntry if exists.
|
||||
*/
|
||||
CoTryTask<ResolveResult> PathResolveOp::pathComponent(const std::variant<InodeId, DirEntry> &parent, const Path &name) {
|
||||
// todo: For each directory, we need load it's Inode to check permission,
|
||||
// this adds performance overhead to path resolution.
|
||||
// A simple way to mitigate this is cache Inode permission information,
|
||||
// if we can tolerate chmod doesn't make effect for several seconds.
|
||||
auto parentId = getInodeId(parent);
|
||||
if (trace_) {
|
||||
if (parentId == InodeId::root()) {
|
||||
// todo: many for other root?
|
||||
*trace_ = "/";
|
||||
}
|
||||
*trace_ /= name;
|
||||
}
|
||||
|
||||
if (!name.filename_is_dot()) pathComponents_++;
|
||||
|
||||
auto result = co_await loadAndCheckParentAcl(txn_, aclCache_, user_, parent, aclCacheTime_);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
if (name.filename_is_dot()) {
|
||||
DirEntry dirEntry = DirEntry::newDirectory(parentId, ".", parentId, getDirectoryAcl(*result));
|
||||
co_return ResolveResult(*result, dirEntry);
|
||||
} else if (name.filename_is_dot_dot()) {
|
||||
if (std::holds_alternative<Inode>(*result)) {
|
||||
auto &parent = std::get<Inode>(*result);
|
||||
auto ppId = parent.asDirectory().parent;
|
||||
auto parentAcl = parent.acl;
|
||||
co_return ResolveResult(std::move(parent), DirEntry::newDirectory(parentId, "..", ppId, parentAcl));
|
||||
} else {
|
||||
auto loadInodeResult = (co_await Inode::snapshotLoad(txn_, getInodeId(*result))).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(loadInodeResult);
|
||||
auto &parent = *loadInodeResult;
|
||||
auto ppId = parent.asDirectory().parent;
|
||||
auto parentAcl = parent.acl;
|
||||
co_return ResolveResult(std::move(parent), DirEntry::newDirectory(parentId, "..", ppId, parentAcl));
|
||||
}
|
||||
} else {
|
||||
auto loadEntryResult = co_await DirEntry::snapshotLoad(txn_, parentId, name.native());
|
||||
CO_RETURN_ON_ERROR(loadEntryResult);
|
||||
co_return ResolveResult(std::move(*result), std::move(loadEntryResult.value()));
|
||||
}
|
||||
}
|
||||
|
||||
static CoTryTask<Path> loadSymLinkTarget(IReadOnlyTransaction &txn, const DirEntry &entry) {
|
||||
auto symlinkResult = co_await entry.snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(symlinkResult);
|
||||
co_return std::move(symlinkResult.value().asSymlink().target);
|
||||
}
|
||||
|
||||
CoTryTask<ResolveResult> PathResolveOp::symlink(DirEntry entry) {
|
||||
if (++depth_ > maxSymlinkDepth_) {
|
||||
co_return makeError(MetaCode::kTooManySymlinks);
|
||||
}
|
||||
SCOPE_EXIT { depth_--; };
|
||||
|
||||
while (true) {
|
||||
if (++symlinkCnt_ > maxSymlinkCount_) {
|
||||
co_return makeError(MetaCode::kTooManySymlinks);
|
||||
}
|
||||
auto symlinkTarget = co_await loadSymLinkTarget(txn_, entry);
|
||||
CO_RETURN_ON_ERROR(symlinkTarget);
|
||||
if (trace_) {
|
||||
trace_->remove_filename();
|
||||
}
|
||||
auto resolveResult = co_await this->path(entry.parent, *symlinkTarget);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
if (!resolveResult->dirEntry.has_value() || !resolveResult->dirEntry->isSymlink()) {
|
||||
co_return resolveResult;
|
||||
}
|
||||
entry = std::move(*resolveResult->dirEntry);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
116
src/meta/store/PathResolve.h
Normal file
116
src/meta/store/PathResolve.h
Normal file
@@ -0,0 +1,116 @@
|
||||
#pragma once
|
||||
|
||||
#include <folly/Overload.h>
|
||||
#include <folly/Utility.h>
|
||||
#include <gtest/gtest_prod.h>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Duration.h"
|
||||
#include "common/utils/Path.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "meta/components/AclCache.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/**
|
||||
* Path Resolution.
|
||||
*
|
||||
* Note: PathResolveOp always use snapshotLoad, so it won't add any key into read conflict set.
|
||||
* User should add keys into read conflict set manually if needed.
|
||||
*/
|
||||
class PathResolveOp : folly::NonCopyableNonMovable {
|
||||
public:
|
||||
struct ResolveResult {
|
||||
// for parent, we may already got it's Inode, or just dirEntry points to it, or just cached acl
|
||||
std::variant<std::pair<InodeId, Acl>, Inode, DirEntry> parent;
|
||||
std::optional<DirEntry> dirEntry;
|
||||
|
||||
ResolveResult(std::variant<std::pair<InodeId, Acl>, Inode, DirEntry> parent, std::optional<DirEntry> dirEntry)
|
||||
: parent(std::move(parent)),
|
||||
dirEntry(std::move(dirEntry)) {}
|
||||
|
||||
InodeId getParentId() const { return getInodeId(parent); }
|
||||
Acl getParentAcl() const { return getDirectoryAcl(parent); }
|
||||
CoTryTask<Inode> getParentInode(kv::IReadOnlyTransaction &txn) const {
|
||||
if (std::holds_alternative<Inode>(parent)) {
|
||||
co_return std::get<Inode>(parent);
|
||||
} else if (std::holds_alternative<DirEntry>(parent)) {
|
||||
co_return co_await std::get<DirEntry>(parent).snapshotLoadInode(txn);
|
||||
} else {
|
||||
auto parentId = std::get<std::pair<InodeId, Acl>>(parent).first;
|
||||
co_return (co_await Inode::snapshotLoad(txn, parentId)).then(checkMetaFound<Inode>);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct ResolveRangeResult : ResolveResult {
|
||||
Path missing;
|
||||
ResolveRangeResult(ResolveResult result, Path missing)
|
||||
: ResolveResult(std::move(result)),
|
||||
missing(missing) {}
|
||||
};
|
||||
|
||||
PathResolveOp(IReadOnlyTransaction &txn, AclCache &aclCache, const UserInfo &userInfo, Path *trace = nullptr)
|
||||
: PathResolveOp(txn, aclCache, userInfo, trace, 4, 8, 5_s) {}
|
||||
PathResolveOp(IReadOnlyTransaction &txn,
|
||||
AclCache &aclCache,
|
||||
const UserInfo &userInfo,
|
||||
Path *trace,
|
||||
size_t maxSymlinkCount,
|
||||
size_t maxSymlinkDepth,
|
||||
Duration aclCacheTime)
|
||||
: txn_(txn),
|
||||
user_(userInfo),
|
||||
aclCache_(aclCache),
|
||||
trace_(trace),
|
||||
depth_(0),
|
||||
symlinkCnt_(0),
|
||||
maxSymlinkCount_(maxSymlinkCount),
|
||||
maxSymlinkDepth_(maxSymlinkDepth),
|
||||
aclCacheTime_(aclCacheTime),
|
||||
pathComponents_(0) {}
|
||||
~PathResolveOp();
|
||||
|
||||
CoTryTask<Inode> inode(const PathAt &path, AtFlags flags, bool checkRefCnt);
|
||||
CoTryTask<DirEntry> dirEntry(const PathAt &path, AtFlags flags);
|
||||
|
||||
CoTryTask<ResolveResult> path(const PathAt &path, AtFlags flags);
|
||||
CoTryTask<ResolveResult> byDirectoryInodeId(InodeId inodeId);
|
||||
CoTryTask<ResolveRangeResult> pathRange(const PathAt &path);
|
||||
|
||||
CoTryTask<ResolveResult> symlink(DirEntry entry);
|
||||
|
||||
private:
|
||||
template <typename>
|
||||
FRIEND_TEST(TestResolve, ResolveComponent);
|
||||
|
||||
CoTryTask<DirEntry> dirEntry(InodeId parent, const Path &path, bool followLastSymlink);
|
||||
|
||||
CoTryTask<ResolveResult> path(InodeId parent, const Path &path);
|
||||
CoTryTask<ResolveResult> path(InodeId parent, const Path &path, bool followLastSymlink);
|
||||
CoTryTask<ResolveResult> pathComponent(const std::variant<InodeId, DirEntry> &parent, const Path &name);
|
||||
CoTryTask<ResolveResult> pathRange(InodeId parent, Path::const_iterator &begin, const Path::const_iterator &end);
|
||||
|
||||
IReadOnlyTransaction &txn_;
|
||||
const UserInfo &user_;
|
||||
AclCache &aclCache_;
|
||||
Path *trace_;
|
||||
|
||||
size_t depth_;
|
||||
size_t symlinkCnt_;
|
||||
|
||||
size_t maxSymlinkCount_;
|
||||
size_t maxSymlinkDepth_;
|
||||
Duration aclCacheTime_;
|
||||
|
||||
size_t pathComponents_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
78
src/meta/store/Utils.h
Normal file
78
src/meta/store/Utils.h
Normal file
@@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <fcntl.h>
|
||||
#include <folly/Overload.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "client/mgmtd/ICommonMgmtdClient.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/StatusCode.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
template <typename T>
|
||||
inline InodeId getInodeId(T &&val) {
|
||||
return folly::variant_match(
|
||||
std::forward<T>(val),
|
||||
[](const Inode &inode) { return inode.id; },
|
||||
[](const DirEntry &entry) { return entry.id; },
|
||||
[](const InodeId &id) { return id; },
|
||||
[](const std::pair<InodeId, Acl> &cachedAcl) { return cachedAcl.first; });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline Acl getDirectoryAcl(T &&val) {
|
||||
return folly::variant_match(
|
||||
std::forward<T>(val),
|
||||
[](const Inode &inode) {
|
||||
assert(inode.isDirectory());
|
||||
return inode.acl;
|
||||
},
|
||||
[](const DirEntry &entry) {
|
||||
assert(entry.isDirectory() && entry.dirAcl.has_value());
|
||||
return *entry.dirAcl;
|
||||
},
|
||||
[](const std::pair<InodeId, Acl> &cachedAcl) { return cachedAcl.second; });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline InodeType getInodeType(T &&val) {
|
||||
return folly::variant_match(
|
||||
std::forward<T>(val),
|
||||
[](const Inode &inode) { return inode.getType(); },
|
||||
[](const DirEntry &entry) { return entry.type; },
|
||||
[](const std::pair<InodeId, Acl> &) { return InodeType::Directory; });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline Result<T> checkMetaFound(std::optional<T> val) {
|
||||
if (!val.has_value()) {
|
||||
return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
return std::move(val.value());
|
||||
}
|
||||
|
||||
inline bool isFirstMeta(client::ICommonMgmtdClient &mgmtd, flat::NodeId nodeId) {
|
||||
auto routing = mgmtd.getRoutingInfo();
|
||||
if (!routing) {
|
||||
return false;
|
||||
}
|
||||
auto nodes = routing->getNodeBy(flat::selectNodeByType(flat::NodeType::META) && flat::selectActiveNode());
|
||||
auto first =
|
||||
std::min_element(nodes.begin(), nodes.end(), [](auto &a, auto &b) { return a.app.nodeId < b.app.nodeId; });
|
||||
return first != nodes.end() && first->app.nodeId == nodeId;
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
751
src/meta/store/ops/BatchOperation.cc
Normal file
751
src/meta/store/ops/BatchOperation.cc
Normal file
@@ -0,0 +1,751 @@
|
||||
#include "BatchOperation.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <bits/ranges_algo.h>
|
||||
#include <boost/iterator/transform_iterator.hpp>
|
||||
#include <cassert>
|
||||
#include <exception>
|
||||
#include <fcntl.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Range.h>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/experimental/coro/Collect.h>
|
||||
#include <folly/experimental/coro/CurrentExecutor.h>
|
||||
#include <folly/functional/Partial.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <folly/io/async/Request.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <ranges>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/OptionalUtils.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/StatusCode.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/core/user/User.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/event/Event.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/ops/SetAttr.h"
|
||||
|
||||
#define CO_RETURN_ON_TXN_ERROR(result) \
|
||||
do { \
|
||||
auto &&_r = result; \
|
||||
if (_r.hasError() && StatusCode::typeOf(_r.error().code()) == StatusCodeType::Transaction) { \
|
||||
CO_RETURN_ERROR(_r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
namespace {
|
||||
monitor::CountRecorder batchCnt("meta_server.batch_op_size");
|
||||
}
|
||||
extern monitor::CountRecorder openWrite;
|
||||
|
||||
/** BatchedOp */
|
||||
CoTryTask<Inode> BatchedOp::run(IReadWriteTransaction &txn) {
|
||||
auto dist = co_await distributor().checkOnServer(txn, inodeId_);
|
||||
CO_RETURN_ON_ERROR(dist);
|
||||
auto [ok, versionstamp] = *dist;
|
||||
if (!ok) {
|
||||
XLOGF(INFO, "inode {} not on current server, need retry", inodeId_);
|
||||
co_return makeError(MetaCode::kBusy, "inode not on server, retry");
|
||||
}
|
||||
|
||||
auto inode = (co_await Inode::snapshotLoad(txn, inodeId_)).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
|
||||
// sanity check for file length, if we hold the lock, and versionstamp not changed file length shouldn't changed
|
||||
if (inode->isFile()) {
|
||||
if (versionstamp != versionstamp_) {
|
||||
currLength_ = inode->asFile().getVersionedLength();
|
||||
nextLength_ = std::nullopt;
|
||||
versionstamp_ = versionstamp;
|
||||
}
|
||||
|
||||
if (currLength_ != inode->asFile().getVersionedLength() && nextLength_ != inode->asFile().getVersionedLength()) {
|
||||
// we should never see this if all meta server is up to date
|
||||
XLOGF(DFATAL,
|
||||
"file {} length updated during operation, {} != {}",
|
||||
*currLength_,
|
||||
inode->asFile().getVersionedLength());
|
||||
co_return makeError(MetaCode::kBusy, "length updated during operation, retry");
|
||||
}
|
||||
}
|
||||
|
||||
// handle all sync and close operation
|
||||
auto r1 = co_await syncAndClose(txn, *inode);
|
||||
CO_RETURN_ON_ERROR(r1);
|
||||
|
||||
auto r2 = co_await setAttr(txn, *inode);
|
||||
CO_RETURN_ON_ERROR(r2);
|
||||
|
||||
auto r3 = co_await create(txn, *inode);
|
||||
CO_RETURN_ON_ERROR(r3);
|
||||
|
||||
auto dirty = *r1 || *r2 || *r3;
|
||||
if (dirty) {
|
||||
// NOTE: add inode into read conflict set
|
||||
CO_RETURN_ON_ERROR(co_await inode->addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
||||
}
|
||||
|
||||
co_return *inode;
|
||||
}
|
||||
|
||||
CoTryTask<bool> BatchedOp::syncAndClose(IReadWriteTransaction &txn, Inode &inode) {
|
||||
std::vector<FileSession> sessions;
|
||||
std::optional<VersionedLength> hintLength;
|
||||
bool updateLength = false;
|
||||
bool truncate = false;
|
||||
bool dirty = false;
|
||||
|
||||
// initial hint length
|
||||
hintLength = meta::VersionedLength{0, 0};
|
||||
|
||||
// merge all requests
|
||||
for (auto &waiter : syncs_) {
|
||||
auto result = co_await sync(inode, waiter.get().req, updateLength, truncate, hintLength);
|
||||
if (result.hasError()) {
|
||||
waiter.get().result = makeError(std::move(result.error()));
|
||||
} else {
|
||||
dirty |= *result;
|
||||
}
|
||||
}
|
||||
for (auto &waiter : closes_) {
|
||||
auto result = co_await close(inode, waiter.get().req, updateLength, hintLength, sessions);
|
||||
if (result.hasError()) {
|
||||
waiter.get().result = makeError(std::move(result.error()));
|
||||
} else {
|
||||
dirty |= *result;
|
||||
}
|
||||
}
|
||||
|
||||
if (truncate) {
|
||||
// ignore hint length when truncate happened
|
||||
hintLength = std::nullopt;
|
||||
updateLength = true;
|
||||
}
|
||||
|
||||
// remove sessions
|
||||
for (auto &session : sessions) {
|
||||
CO_RETURN_ON_ERROR(co_await session.remove(txn));
|
||||
}
|
||||
|
||||
if (!updateLength) {
|
||||
// we don't need updateLength, just return
|
||||
co_return dirty;
|
||||
}
|
||||
|
||||
if (!inode.isFile()) {
|
||||
XLOGF(DFATAL, "{} updateLength but not file, shouldn't happen", inode);
|
||||
co_return makeError(MetaCode::kFoundBug, "updateLength but not file");
|
||||
}
|
||||
|
||||
auto newLength = co_await queryLength(inode, hintLength, truncate);
|
||||
CO_RETURN_ON_ERROR(newLength);
|
||||
nextLength_ = *newLength;
|
||||
if (*newLength != inode.asFile().getVersionedLength()) {
|
||||
XLOGF_IF(FATAL,
|
||||
(newLength->truncateVer < inode.asFile().truncateVer ||
|
||||
(newLength->truncateVer == inode.asFile().truncateVer && newLength->length < inode.asFile().length)),
|
||||
"file {}, newLength {} currLength {}",
|
||||
inode.id,
|
||||
*newLength,
|
||||
inode.asFile().getVersionedLength());
|
||||
XLOGF(DBG, "{} changed, {} != {}", inode.id, *newLength, inode.asFile().getVersionedLength());
|
||||
|
||||
SetAttr::update(inode.mtime, UtcClock::now(), config().time_granularity(), true);
|
||||
if (newLength->truncateVer != inode.asFile().truncateVer) {
|
||||
SetAttr::update(inode.ctime, UtcClock::now(), config().time_granularity(), true);
|
||||
}
|
||||
|
||||
inode.asFile().setVersionedLength(*newLength);
|
||||
dirty = true;
|
||||
} else {
|
||||
XLOGF(DBG,
|
||||
"{} length not changed, length {} == {}, {} {}",
|
||||
inode.id,
|
||||
*newLength,
|
||||
inode.asFile().getVersionedLength(),
|
||||
*newLength != inode.asFile().getVersionedLength(),
|
||||
*newLength == inode.asFile().getVersionedLength());
|
||||
}
|
||||
|
||||
co_return dirty;
|
||||
}
|
||||
|
||||
CoTryTask<bool> BatchedOp::sync(Inode &inode,
|
||||
const SyncReq &req,
|
||||
bool &updateLength,
|
||||
bool &truncate,
|
||||
std::optional<VersionedLength> &hintLength) {
|
||||
// check request
|
||||
CO_RETURN_ON_ERROR(req.valid());
|
||||
if (req.inode != PathAt(inodeId_)) {
|
||||
XLOGF(DFATAL, "SyncReq {} shouldn't in batch of {}", req, inodeId_);
|
||||
co_return makeError(MetaCode::kFoundBug, "Invalid batchOp");
|
||||
}
|
||||
if ((req.updateLength || req.truncated || req.lengthHint) && !inode.isFile()) {
|
||||
co_return makeError(MetaCode::kNotFile, "update length but not file");
|
||||
}
|
||||
if (req.lengthHint && req.lengthHint->truncateVer > inode.asFile().truncateVer) {
|
||||
auto msg = fmt::format("inode {} hint truncateVer {} > current truncateVer {}",
|
||||
inodeId_,
|
||||
req.lengthHint->truncateVer,
|
||||
inode.asFile().truncateVer);
|
||||
XLOG(DFATAL, msg);
|
||||
co_return makeError(MetaCode::kFoundBug, std::move(msg));
|
||||
}
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetAttr::update(inode.atime, req.atime, config().time_granularity(), true /* cmp */);
|
||||
dirty |= SetAttr::update(inode.mtime, req.mtime, config().time_granularity(), true /* cmp */);
|
||||
if (req.truncated) {
|
||||
dirty |=
|
||||
SetAttr::update(inode.ctime, req.mtime.value_or(UtcClock::now()), config().time_granularity(), true /* cmp */);
|
||||
}
|
||||
|
||||
updateLength |= req.updateLength;
|
||||
if (req.updateLength) {
|
||||
hintLength = VersionedLength::mergeHint(hintLength, req.lengthHint);
|
||||
}
|
||||
truncate |= req.truncated;
|
||||
|
||||
co_return dirty;
|
||||
}
|
||||
|
||||
CoTryTask<bool> BatchedOp::close(Inode &inode,
|
||||
const CloseReq &req,
|
||||
bool &updateLength,
|
||||
std::optional<VersionedLength> &hintLength,
|
||||
std::vector<FileSession> &sessions) {
|
||||
CO_RETURN_ON_ERROR(req.valid());
|
||||
if (req.inode != PathAt(inodeId_)) {
|
||||
XLOGF(DFATAL, "CloseReq {} shouldn't batch of {}", req, inodeId_);
|
||||
co_return makeError(MetaCode::kFoundBug, "Invalid batchOp");
|
||||
}
|
||||
|
||||
if ((req.session || req.updateLength) && !inode.isFile()) {
|
||||
co_return makeError(MetaCode::kNotFile);
|
||||
}
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetAttr::update(inode.atime, req.atime, config().time_granularity(), true /* cmp */);
|
||||
dirty |= SetAttr::update(inode.mtime, req.mtime, config().time_granularity(), true /* cmp */);
|
||||
|
||||
updateLength |= req.updateLength;
|
||||
if (req.updateLength) {
|
||||
hintLength = VersionedLength::mergeHint(hintLength, req.lengthHint);
|
||||
}
|
||||
|
||||
if (req.session.has_value()) {
|
||||
sessions.push_back(FileSession::create(inode.id, *req.session));
|
||||
}
|
||||
|
||||
co_return dirty;
|
||||
}
|
||||
|
||||
CoTryTask<VersionedLength> BatchedOp::queryLength(const Inode &inode,
|
||||
std::optional<VersionedLength> hintLength,
|
||||
bool truncate) {
|
||||
XLOGF_IF(FATAL, !inode.isFile(), "not file");
|
||||
XLOGF_IF(FATAL, truncate && hintLength, "truncate but hintLength {}", *hintLength);
|
||||
if (nextLength_) {
|
||||
XLOGF(DBG, "inode {} update to cached nextLength {}", inode, *nextLength_);
|
||||
co_return *nextLength_;
|
||||
}
|
||||
|
||||
auto currLength = inode.asFile().getVersionedLength();
|
||||
if (hintLength && !config().ignore_length_hint()) {
|
||||
if (currLength.truncateVer >= hintLength->truncateVer && currLength.length >= hintLength->length) {
|
||||
XLOGF(DBG, "don't need update {}, current {}, hint {}", inode.id, currLength, *hintLength);
|
||||
co_return currLength;
|
||||
}
|
||||
if (hintLength->truncateVer == currLength.truncateVer && hintLength->length > currLength.truncateVer) {
|
||||
XLOGF(DBG, "update {} to hint {}, current {}", inode.id, *hintLength, currLength);
|
||||
co_return *hintLength;
|
||||
}
|
||||
XLOGF_IF(DFATAL,
|
||||
hintLength->truncateVer > currLength.truncateVer,
|
||||
"file {}, hint {} > {}!!!",
|
||||
inode.id,
|
||||
hintLength->truncateVer,
|
||||
currLength.truncateVer);
|
||||
}
|
||||
|
||||
XLOGF(DBG,
|
||||
"need query length for {}, current {}, hint {}, ignore hint {}, truncate {}, sync {}, close {}",
|
||||
inode.id,
|
||||
currLength,
|
||||
OptionalFmt(hintLength),
|
||||
config().ignore_length_hint(),
|
||||
truncate,
|
||||
syncs_.size(),
|
||||
closes_.size());
|
||||
auto length = co_await fileHelper().queryLength(flat::UserInfo(user_), inode);
|
||||
CO_RETURN_ON_ERROR(length);
|
||||
XLOGF(DBG, "qeury length for {}, get {}", inode.id, *length);
|
||||
auto truncateVer =
|
||||
(truncate || *length < inode.asFile().length) ? inode.asFile().truncateVer + 1 : inode.asFile().truncateVer;
|
||||
co_return VersionedLength{*length, truncateVer};
|
||||
}
|
||||
|
||||
CoTryTask<bool> BatchedOp::setAttr(IReadWriteTransaction &txn, Inode &inode) {
|
||||
auto dirty = false;
|
||||
auto oldAcl = inode.acl;
|
||||
for (auto &waiter : setattrs_) {
|
||||
const auto &req = waiter.get().req;
|
||||
if (req.path != PathAt(inodeId_)) {
|
||||
XLOGF(DFATAL, "SetAttrReq {} shouldn't in batch of {}", req, inodeId_);
|
||||
co_return makeError(MetaCode::kFoundBug, "Invalid batchOp");
|
||||
}
|
||||
auto result = SetAttr::check(inode, req, config());
|
||||
if (result.hasError()) {
|
||||
waiter.get().result = makeError(result.error());
|
||||
} else {
|
||||
dirty |= SetAttr::apply(inode, req, config().time_granularity(), config().dynamic_stripe_growth());
|
||||
}
|
||||
}
|
||||
|
||||
if (inode.isDirectory() && inode.acl != oldAcl && inode.id != InodeId::root()) {
|
||||
XLOGF_IF(FATAL, !dirty, "acl changed but dirty not set");
|
||||
auto result = co_await inode.snapshotLoadDirEntry(txn);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
auto entry = DirEntry(*result);
|
||||
XLOGF_IF(DFATAL,
|
||||
(!inode.asDirectory().name.empty() && entry.name != inode.asDirectory().name),
|
||||
"{} != {}",
|
||||
entry.name,
|
||||
inode.asDirectory().name);
|
||||
if (inode.asDirectory().name.empty()) {
|
||||
inode.asDirectory().name = entry.name;
|
||||
}
|
||||
entry.dirAcl = inode.acl;
|
||||
CO_RETURN_ON_ERROR(co_await entry.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
}
|
||||
|
||||
co_return dirty;
|
||||
}
|
||||
|
||||
CoTryTask<bool> BatchedOp::create(IReadWriteTransaction &txn, Inode &inode) {
|
||||
if (creates_.empty()) {
|
||||
co_return false;
|
||||
}
|
||||
|
||||
if (!inode.isDirectory()) {
|
||||
for (auto &waiter : creates_) {
|
||||
waiter.get().result = makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
co_return false;
|
||||
}
|
||||
|
||||
folly::Synchronized<uint32_t> chainAllocCounter(inode.asDirectory().chainAllocCounter);
|
||||
if (creates_.size() == 1) {
|
||||
auto result = co_await create(txn, inode, chainAllocCounter, creates_.begin(), creates_.end());
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
co_return SetAttr::update(inode.asDirectory().chainAllocCounter, *chainAllocCounter.rlock()) || *result;
|
||||
}
|
||||
|
||||
std::multimap<std::string, WaiterRef<CreateReq, CreateRsp>> map;
|
||||
for (auto &waiter : creates_) {
|
||||
const auto &name = waiter.get().req.path.path;
|
||||
if (UNLIKELY(!name || name->has_parent_path())) {
|
||||
auto msg = fmt::format("inode {}, create req {}", inodeId_, waiter.get().req);
|
||||
XLOG(DFATAL, msg);
|
||||
co_return makeError(MetaCode::kFoundBug, std::move(msg));
|
||||
}
|
||||
map.insert({name->string(), waiter});
|
||||
}
|
||||
|
||||
std::vector<folly::SemiFuture<Result<bool>>> tasks;
|
||||
auto exec = co_await folly::coro::co_current_executor;
|
||||
auto dirty = false;
|
||||
|
||||
auto convert = [](auto &iter) { return iter.second; };
|
||||
for (auto begin = map.begin(), end = std::next(begin); begin != map.end(); begin = end) {
|
||||
while (end != map.end() && end->first == begin->first) end++;
|
||||
|
||||
auto ibegin = boost::make_transform_iterator(begin, convert);
|
||||
auto iend = boost::make_transform_iterator(end, convert);
|
||||
tasks.push_back(create(txn, inode, chainAllocCounter, ibegin, iend).scheduleOn(exec).start());
|
||||
|
||||
if (tasks.size() >= 8 || end == map.end()) {
|
||||
auto results = co_await folly::coro::collectAllRange(std::exchange(tasks, {}));
|
||||
for (auto &res : results) {
|
||||
CO_RETURN_ON_ERROR(res);
|
||||
dirty |= *res;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(tasks.empty());
|
||||
dirty |= SetAttr::update(inode.asDirectory().chainAllocCounter, *chainAllocCounter.rlock());
|
||||
|
||||
co_return dirty;
|
||||
}
|
||||
|
||||
CoTryTask<bool> BatchedOp::create(IReadWriteTransaction &txn,
|
||||
const Inode &parent,
|
||||
folly::Synchronized<uint32_t> &chainAllocCounter,
|
||||
auto begin,
|
||||
auto end) {
|
||||
if (begin == end) {
|
||||
co_return false;
|
||||
}
|
||||
|
||||
const auto &path = begin->get().req.path;
|
||||
XLOGF_IF(FATAL,
|
||||
(parent.id != inodeId_ || path.parent != inodeId_ || !path.path || path.path->has_parent_path()),
|
||||
"{}, {}, {}",
|
||||
parent.id,
|
||||
inodeId_,
|
||||
path);
|
||||
|
||||
const auto &name = path.path->string();
|
||||
auto entry = co_await DirEntry::snapshotLoad(txn, inodeId_, name);
|
||||
CO_RETURN_ON_ERROR(entry);
|
||||
XLOGF(DBG, "entry {}/{} -> {}", inodeId_, name, OptionalFmt(*entry));
|
||||
|
||||
if (entry->has_value()) {
|
||||
auto inode = co_await entry->value().snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return (co_await openExists(txn, *inode, **entry, begin, end)).then([](auto &) { return false; });
|
||||
}
|
||||
|
||||
for (auto iter = begin; iter != end; iter++) {
|
||||
auto &waiter = iter->get();
|
||||
assert(!entry->has_value());
|
||||
|
||||
auto result = co_await create(txn, parent, chainAllocCounter, waiter.req);
|
||||
CO_RETURN_ON_TXN_ERROR(result);
|
||||
if (result.hasError()) {
|
||||
waiter.result = makeError(std::move(result.error()));
|
||||
} else {
|
||||
auto &[inode, entry] = *result;
|
||||
waiter.result = CreateRsp(inode, false /* needTrunc */);
|
||||
waiter.newFile = true;
|
||||
co_return (co_await openExists(txn, inode, entry, std::next(iter), end)).then([](auto &) { return false; });
|
||||
}
|
||||
}
|
||||
|
||||
co_return false;
|
||||
}
|
||||
|
||||
CoTryTask<std::pair<Inode, DirEntry>> BatchedOp::create(IReadWriteTransaction &txn,
|
||||
const Inode &parent,
|
||||
folly::Synchronized<uint32_t> &chainAllocCounter,
|
||||
const CreateReq &req) {
|
||||
CO_RETURN_ON_ERROR(req.valid());
|
||||
auto parentId = inodeId_;
|
||||
auto parentAcl = parent.acl;
|
||||
const auto &name = req.path.path->string();
|
||||
|
||||
if (!parent.nlink) {
|
||||
co_return makeError(MetaCode::kNotFound, fmt::format("{}, Directory {} is removed", req.path, parentId));
|
||||
}
|
||||
|
||||
CO_RETURN_ON_ERROR(req.path.validForCreate());
|
||||
CO_RETURN_ON_ERROR(parentAcl.checkPermission(req.user, AccessType::WRITE));
|
||||
CO_RETURN_ON_ERROR(parent.asDirectory().checkLock(req.client));
|
||||
|
||||
auto layout = req.layout;
|
||||
if (!layout.has_value()) {
|
||||
// user doesn't specific layout, inherit parent directory's layout.
|
||||
layout = parent.asDirectory().layout;
|
||||
}
|
||||
|
||||
if (!layout->empty()) {
|
||||
CO_RETURN_ON_ERROR(co_await chainAlloc().checkLayoutValid(*layout));
|
||||
} else {
|
||||
if (parent.acl.iflags & FS_CHAIN_ALLOCATION_FL) {
|
||||
CO_RETURN_ON_ERROR(co_await chainAlloc().allocateChainsForLayout(*layout, chainAllocCounter));
|
||||
} else {
|
||||
CO_RETURN_ON_ERROR(co_await chainAlloc().allocateChainsForLayout(*layout));
|
||||
}
|
||||
}
|
||||
|
||||
auto newChunkEngine = config().enable_new_chunk_engine() || (parent.acl.iflags & FS_NEW_CHUNK_ENGINE);
|
||||
auto inodeId = co_await allocateInodeId(txn, newChunkEngine);
|
||||
CO_RETURN_ON_ERROR(inodeId);
|
||||
XLOGF_IF(FATAL,
|
||||
inodeId->useNewChunkEngine() != newChunkEngine,
|
||||
"InodeId {}, use new chunk engine {}",
|
||||
inodeId,
|
||||
newChunkEngine);
|
||||
|
||||
auto entry = DirEntry::newFile(parentId, name, *inodeId);
|
||||
entry.uuid = req.uuid;
|
||||
auto inode = Inode::newFile(*inodeId,
|
||||
Acl(req.user.uid, req.user.gid, meta::Permission(req.perm & ALLPERMS)),
|
||||
std::move(*layout),
|
||||
now());
|
||||
if (config().dynamic_stripe() && req.dynStripe) {
|
||||
inode.asFile().dynStripe = std::min(config().dynamic_stripe_initial(), inode.asFile().layout.stripeSize);
|
||||
}
|
||||
|
||||
if (parentAcl.perm & S_ISGID) {
|
||||
// The set-group-ID bit (S_ISGID) has several special uses.
|
||||
// For a directory, it indicates that BSD semantics are to be used for that directory:
|
||||
// files created there inherit their group ID from the directory, not from the effective group ID of the creating
|
||||
// process, and directories created there will also get the S_ISGID bit set
|
||||
inode.acl.gid = parentAcl.gid;
|
||||
}
|
||||
|
||||
// NOTE: add parent inode and dirEntry into read conflict set.
|
||||
// add parent inode into read conflict set to prevent parent is removed concurrently
|
||||
CO_RETURN_ON_ERROR(co_await Inode(parentId).addIntoReadConflict(txn));
|
||||
// add directory entry into read conflict set to prevent concurrent create
|
||||
CO_RETURN_ON_ERROR(co_await entry.addIntoReadConflict(txn));
|
||||
|
||||
// create inode and dirEntry
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
|
||||
if (req.session && req.flags.accessType() != AccessType::READ) {
|
||||
openWrite.addSample(1);
|
||||
CO_RETURN_ON_ERROR(co_await FileSession::create(inode.id, req.session.value()).store(txn));
|
||||
}
|
||||
|
||||
co_return std::make_pair(inode, entry);
|
||||
}
|
||||
|
||||
CoTryTask<void> BatchedOp::openExists(IReadWriteTransaction &txn,
|
||||
Inode &inode,
|
||||
const DirEntry &entry,
|
||||
auto begin,
|
||||
auto end) {
|
||||
bool dirty = false;
|
||||
for (auto iter = begin; iter != end; iter++) {
|
||||
auto &waiter = iter->get();
|
||||
auto &req = waiter.req;
|
||||
if (entry.uuid != Uuid::zero() && entry.uuid == req.uuid) {
|
||||
// this may happens when FDB returns commit_unknown_result, or we failed to send response to client
|
||||
XLOGF(CRITICAL, "Create already finished, dst {}, req {}, uuid {}", entry, req, req.uuid);
|
||||
waiter.result = CreateRsp(inode, false /* trunc */);
|
||||
continue;
|
||||
}
|
||||
auto result = co_await openExists(txn, inode, req);
|
||||
CO_RETURN_ON_TXN_ERROR(result);
|
||||
if (result.hasError()) {
|
||||
waiter.result = makeError(std::move(result.error()));
|
||||
} else {
|
||||
waiter.result = CreateRsp(inode, waiter.req.flags.contains(O_TRUNC) /* needTrunc */);
|
||||
dirty |= *result;
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
CO_RETURN_ON_ERROR(co_await inode.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<bool> BatchedOp::openExists(IReadWriteTransaction &txn, Inode &inode, const CreateReq &req) {
|
||||
CO_RETURN_ON_ERROR(req.valid());
|
||||
if (inode.isSymlink()) {
|
||||
// todo: rarely happens, how to handle this gracefully?
|
||||
auto msg = fmt::format("req {}, found symlink {}", req, inode);
|
||||
XLOG(WARN, msg);
|
||||
co_return makeError(MetaCode::kBusy, std::move(msg));
|
||||
}
|
||||
if (!inode.isFile()) {
|
||||
assert(inode.isDirectory());
|
||||
co_return makeError(MetaCode::kIsDirectory);
|
||||
}
|
||||
if (req.flags.contains(O_EXCL)) {
|
||||
co_return makeError(MetaCode::kExists);
|
||||
}
|
||||
|
||||
// check permission
|
||||
if (req.flags.accessType() != AccessType::READ && (inode.acl.iflags & FS_IMMUTABLE_FL)) {
|
||||
co_return makeError(MetaCode::kNoPermission, fmt::format("FS_IMMUTABLE_FL set on inode {}", inode.id));
|
||||
}
|
||||
CO_RETURN_ON_ERROR(inode.acl.checkPermission(req.user, req.flags.accessType()));
|
||||
// check hole
|
||||
auto rdonly = req.flags.accessType() == AccessType::READ;
|
||||
if (rdonly && inode.asFile().hasHole() && config().check_file_hole()) {
|
||||
XLOGF(WARN, "Inode {} contains hole, don't allow O_RDONLY", inode.id);
|
||||
co_return makeError(MetaCode::kFileHasHole);
|
||||
}
|
||||
|
||||
auto dirty = false;
|
||||
// clear SUID SGID sticky bits on write by non owner
|
||||
constexpr uint32_t sbits = S_ISUID | S_ISGID | S_ISVTX;
|
||||
static_assert(sbits == 07000);
|
||||
if (!rdonly && req.user.uid != inode.acl.uid && (inode.acl.perm & sbits)) {
|
||||
dirty |= SetAttr::update(inode.acl.perm, Permission(inode.acl.perm & (~sbits)));
|
||||
}
|
||||
// update dynamic stripe
|
||||
if (req.session.has_value() && req.flags.accessType() != AccessType::READ) {
|
||||
CO_RETURN_ON_ERROR(inode.acl.checkPermission(req.user, req.flags.accessType()));
|
||||
if (!req.dynStripe && inode.asFile().dynStripe && inode.asFile().dynStripe < inode.asFile().layout.stripeSize) {
|
||||
dirty |= SetAttr::update(inode.asFile().dynStripe, 0u);
|
||||
}
|
||||
}
|
||||
// create session
|
||||
if (req.session && req.flags.accessType() != AccessType::READ) {
|
||||
openWrite.addSample(1);
|
||||
CO_RETURN_ON_ERROR(co_await FileSession::create(inode.id, req.session.value()).store(txn));
|
||||
}
|
||||
co_return dirty;
|
||||
}
|
||||
|
||||
void BatchedOp::retry(const Status &error) {
|
||||
Operation<Inode>::retry(error);
|
||||
for (auto &waiter : syncs_) {
|
||||
waiter.get().result = std::nullopt;
|
||||
}
|
||||
for (auto &waiter : closes_) {
|
||||
waiter.get().result = std::nullopt;
|
||||
}
|
||||
for (auto &waiter : setattrs_) {
|
||||
waiter.get().result = std::nullopt;
|
||||
}
|
||||
for (auto &waiter : creates_) {
|
||||
waiter.get().result = std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
/** BatchedOp::Waiter */
|
||||
template <>
|
||||
void BatchedOp::Waiter<CreateReq, CreateRsp>::finish(BatchedOp &op, const Result<Inode> &r) {
|
||||
SCOPE_EXIT { baton.post(); };
|
||||
if (r.hasError() && !result.has_value()) {
|
||||
result = makeError(r.error());
|
||||
return;
|
||||
}
|
||||
XLOGF_IF(FATAL, !result.has_value(), "req {}, no result", req);
|
||||
if (result->hasError()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto inode = result->value().stat;
|
||||
if (newFile) {
|
||||
op.addEvent(Event::Type::Create)
|
||||
.addField("parent", req.path.parent)
|
||||
.addField("name", req.path.path->string())
|
||||
.addField("inode", inode.id)
|
||||
.addField("user", req.user.uid)
|
||||
.addField("host", req.client.hostname)
|
||||
.addField("chain_table", inode.asFile().layout.tableId);
|
||||
}
|
||||
if (req.session && req.flags.accessType() != AccessType::READ) {
|
||||
if (req.flags.contains(O_TRUNC) && !newFile) {
|
||||
result.value()->needTruncate = true;
|
||||
}
|
||||
op.addEvent(Event::Type::OpenWrite)
|
||||
.addField("inode", inode.id)
|
||||
.addField("owner", inode.acl.uid)
|
||||
.addField("user", req.user.uid)
|
||||
.addField("host", req.client.hostname)
|
||||
.addField("length", inode.asFile().length)
|
||||
.addField("truncateVer", inode.asFile().truncateVer)
|
||||
.addField("dynStripe", inode.asFile().dynStripe)
|
||||
.addField("otrunc", req.flags.contains(O_TRUNC));
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void BatchedOp::Waiter<SyncReq, SyncRsp>::finish(BatchedOp &op, const Result<Inode> &r) {
|
||||
if (!result.has_value()) {
|
||||
result = r.then([](auto &inode) { return SyncRsp(inode); });
|
||||
}
|
||||
if (req.truncated && !hasError()) {
|
||||
auto &inode = result.value()->stat;
|
||||
XLOGF_IF(DFATAL, !inode.isFile(), "req {} success, but inode {} is not file", req, inode);
|
||||
if (inode.isFile()) {
|
||||
op.addEvent(Event::Type::Truncate)
|
||||
.addField("inode", inode.id)
|
||||
.addField("length", inode.asFile().length)
|
||||
.addField("truncateVer", inode.asFile().truncateVer)
|
||||
.addField("dynStripe", inode.asFile().dynStripe)
|
||||
.addField("user", req.user.uid)
|
||||
.addField("host", req.client.hostname);
|
||||
op.addTrace(MetaEventTrace{
|
||||
.eventType = Event::Type::Truncate,
|
||||
.inodeId = inode.id,
|
||||
.userId = req.user.uid,
|
||||
.client = req.client,
|
||||
.length = inode.asFile().length,
|
||||
.truncateVer = inode.asFile().truncateVer,
|
||||
.dynStripe = inode.asFile().dynStripe,
|
||||
});
|
||||
}
|
||||
}
|
||||
baton.post();
|
||||
}
|
||||
|
||||
template <>
|
||||
void BatchedOp::Waiter<CloseReq, CloseRsp>::finish(BatchedOp &op, const Result<Inode> &r) {
|
||||
if (!result.has_value()) {
|
||||
result = r.then([](auto &inode) { return CloseRsp(inode); });
|
||||
}
|
||||
if (req.session && !hasError()) {
|
||||
auto &inode = result.value()->stat;
|
||||
XLOGF_IF(DFATAL, !inode.isFile(), "req {} success, but inode {} is not file", req, inode);
|
||||
if (inode.isFile()) {
|
||||
op.addEvent(Event::Type::CloseWrite)
|
||||
.addField("inode", inode.id.toHexString())
|
||||
.addField("owner", inode.acl.uid)
|
||||
.addField("user", req.user.uid)
|
||||
.addField("host", req.client.hostname)
|
||||
.addField("length", inode.asFile().length)
|
||||
.addField("truncateVer", inode.asFile().truncateVer)
|
||||
.addField("dynStripe", inode.asFile().dynStripe)
|
||||
.addField("prune", req.pruneSession);
|
||||
op.addTrace(MetaEventTrace{
|
||||
.eventType = Event::Type::CloseWrite,
|
||||
.inodeId = inode.id,
|
||||
.ownerId = inode.acl.uid,
|
||||
.userId = req.user.uid,
|
||||
.client = req.client,
|
||||
.length = inode.asFile().length,
|
||||
.truncateVer = inode.asFile().truncateVer,
|
||||
.dynStripe = inode.asFile().dynStripe,
|
||||
.pruneSession = req.pruneSession,
|
||||
});
|
||||
}
|
||||
}
|
||||
baton.post();
|
||||
}
|
||||
|
||||
template <>
|
||||
void BatchedOp::Waiter<SetAttrReq, SetAttrRsp>::finish(BatchedOp &, const Result<Inode> &r) {
|
||||
if (!result.has_value()) {
|
||||
result = r.then([](auto &inode) { return SetAttrRsp(inode); });
|
||||
}
|
||||
baton.post();
|
||||
}
|
||||
|
||||
void BatchedOp::finish(const Result<Inode> &result) {
|
||||
batchCnt.addSample(syncs_.size() + closes_.size() + setattrs_.size());
|
||||
for (auto &waiter : syncs_) {
|
||||
waiter.get().finish(*this, result);
|
||||
}
|
||||
for (auto &waiter : closes_) {
|
||||
waiter.get().finish(*this, result);
|
||||
}
|
||||
for (auto &waiter : setattrs_) {
|
||||
waiter.get().finish(*this, result);
|
||||
}
|
||||
for (auto &waiter : creates_) {
|
||||
waiter.get().finish(*this, result);
|
||||
}
|
||||
Operation<Inode>::finish(result);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
177
src/meta/store/ops/BatchOperation.h
Normal file
177
src/meta/store/ops/BatchOperation.h
Normal file
@@ -0,0 +1,177 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/Utility.h>
|
||||
#include <folly/experimental/coro/Baton.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/Shards.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/core/user/User.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class MetaOperator;
|
||||
|
||||
class BatchedOp : public Operation<Inode> {
|
||||
public:
|
||||
template <typename Req, typename Rsp>
|
||||
struct Waiter : folly::NonCopyableNonMovable {
|
||||
Req req;
|
||||
std::optional<Result<Rsp>> result;
|
||||
folly::coro::Baton baton;
|
||||
bool newFile = false; /* for Create operation */
|
||||
|
||||
Waiter(Req req)
|
||||
: req(std::move(req)) {}
|
||||
|
||||
Result<Rsp> getResult() {
|
||||
XLOGF_IF(FATAL, !result.has_value(), "result not set");
|
||||
return *result;
|
||||
}
|
||||
|
||||
bool hasError() const { return result.has_value() && result->hasError(); }
|
||||
|
||||
void finish(BatchedOp &op, const Result<Inode> &r);
|
||||
};
|
||||
|
||||
BatchedOp(MetaStore &meta, InodeId inodeId)
|
||||
: Operation(meta),
|
||||
inodeId_(inodeId) {}
|
||||
|
||||
std::string_view name() const override { return "batchedOp"; }
|
||||
|
||||
flat::Uid user() const override { return user_; }
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
void add(Waiter<Req, Rsp> &waiter);
|
||||
|
||||
template <>
|
||||
void add(Waiter<SyncReq, SyncRsp> &waiter) {
|
||||
XLOGF_IF(FATAL, waiter.req.inode != inodeId_, "{} != {}", waiter.req.inode, inodeId_);
|
||||
addReq(syncs_, waiter);
|
||||
}
|
||||
|
||||
template <>
|
||||
void add(Waiter<CloseReq, CloseRsp> &waiter) {
|
||||
XLOGF_IF(FATAL, waiter.req.inode != inodeId_, "{} != {}", waiter.req.inode, inodeId_);
|
||||
addReq(closes_, waiter);
|
||||
}
|
||||
|
||||
template <>
|
||||
void add(Waiter<SetAttrReq, SetAttrRsp> &waiter) {
|
||||
XLOGF_IF(FATAL, waiter.req.path != PathAt(inodeId_), "{} != {}", waiter.req.path, PathAt(inodeId_));
|
||||
addReq(setattrs_, waiter);
|
||||
}
|
||||
|
||||
template <>
|
||||
void add(Waiter<CreateReq, CreateRsp> &waiter) {
|
||||
XLOGF_IF(FATAL,
|
||||
(waiter.req.path.parent != inodeId_ || !waiter.req.path.path || waiter.req.path.path->has_parent_path()),
|
||||
"path {}, inodeId {}",
|
||||
waiter.req.path,
|
||||
inodeId_);
|
||||
addReq(creates_, waiter);
|
||||
}
|
||||
|
||||
CoTryTask<Inode> run(IReadWriteTransaction &txn) override;
|
||||
|
||||
void retry(const Status &error) override;
|
||||
|
||||
void finish(const Result<Inode> &result) override;
|
||||
|
||||
size_t numReqs() const { return numReqs_; }
|
||||
|
||||
// for test
|
||||
static CoTryTask<CreateRsp> create(MetaStore &store, IReadWriteTransaction &txn, CreateReq req) {
|
||||
Waiter<CreateReq, CreateRsp> waiter(req);
|
||||
BatchedOp op(store, req.path.parent);
|
||||
op.add(waiter);
|
||||
op.finish(co_await op.run(txn));
|
||||
co_return waiter.getResult();
|
||||
}
|
||||
|
||||
private:
|
||||
friend class MetaOperator;
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
using WaiterRef = std::reference_wrapper<Waiter<Req, Rsp>>;
|
||||
|
||||
void addReq(auto &reqs, auto &waiter) {
|
||||
if (!user_) {
|
||||
user_ = waiter.req.user.uid;
|
||||
}
|
||||
reqs.emplace_back(waiter);
|
||||
numReqs_++;
|
||||
}
|
||||
|
||||
CoTryTask<bool> setAttr(IReadWriteTransaction &txn, Inode &inode);
|
||||
|
||||
CoTryTask<bool> syncAndClose(IReadWriteTransaction &txn, Inode &inode);
|
||||
|
||||
CoTryTask<bool> sync(Inode &inode,
|
||||
const SyncReq &req,
|
||||
bool &updateLength,
|
||||
bool &truncate,
|
||||
std::optional<VersionedLength> &hintLength);
|
||||
|
||||
CoTryTask<bool> close(Inode &inode,
|
||||
const CloseReq &req,
|
||||
bool &updateLength,
|
||||
std::optional<VersionedLength> &hintLength,
|
||||
std::vector<FileSession> &sessions);
|
||||
|
||||
CoTryTask<bool> create(IReadWriteTransaction &txn, Inode &inode);
|
||||
|
||||
CoTryTask<bool> create(IReadWriteTransaction &txn,
|
||||
const Inode &parent,
|
||||
folly::Synchronized<uint32_t> &chainAllocCounter,
|
||||
auto begin,
|
||||
auto end);
|
||||
|
||||
CoTryTask<std::pair<Inode, DirEntry>> create(IReadWriteTransaction &txn,
|
||||
const Inode &parent,
|
||||
folly::Synchronized<uint32_t> &chainAllocCounter,
|
||||
const CreateReq &req);
|
||||
|
||||
CoTryTask<void> openExists(IReadWriteTransaction &txn, Inode &inode, const DirEntry &entry, auto begin, auto end);
|
||||
|
||||
CoTryTask<bool> openExists(IReadWriteTransaction &txn, Inode &inode, const CreateReq &req);
|
||||
|
||||
CoTryTask<VersionedLength> queryLength(const Inode &inode, std::optional<VersionedLength> hintLength, bool truncate);
|
||||
|
||||
// requests
|
||||
InodeId inodeId_;
|
||||
flat::Uid user_; // use first uid
|
||||
std::vector<WaiterRef<SetAttrReq, SetAttrRsp>> setattrs_;
|
||||
std::vector<WaiterRef<SyncReq, SyncRsp>> syncs_;
|
||||
std::vector<WaiterRef<CloseReq, CloseRsp>> closes_;
|
||||
std::vector<WaiterRef<CreateReq, CreateRsp>> creates_;
|
||||
size_t numReqs_ = 0;
|
||||
|
||||
// state
|
||||
std::optional<kv::Versionstamp> versionstamp_;
|
||||
std::optional<VersionedLength> currLength_;
|
||||
std::optional<VersionedLength> nextLength_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
104
src/meta/store/ops/GetRealPath.cc
Normal file
104
src/meta/store/ops/GetRealPath.cc
Normal file
@@ -0,0 +1,104 @@
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
static Path simplifyPath(const Path &path) {
|
||||
std::vector<Path> components;
|
||||
bool absolute = false;
|
||||
for (auto &name : path) {
|
||||
if (name == ".") {
|
||||
continue;
|
||||
} else if (name == "..") {
|
||||
if (!components.empty() && components.back() != "..") {
|
||||
components.pop_back();
|
||||
} else if (!absolute) {
|
||||
components.push_back(name);
|
||||
}
|
||||
} else if (name == "/") {
|
||||
absolute = true;
|
||||
components.clear();
|
||||
} else {
|
||||
components.push_back(name);
|
||||
}
|
||||
}
|
||||
|
||||
Path p = absolute ? "/" : "";
|
||||
for (auto &name : components) {
|
||||
p = p / name;
|
||||
}
|
||||
|
||||
XLOGF(DBG, "before {}, after {}", path, p);
|
||||
return p.empty() ? "." : p;
|
||||
}
|
||||
|
||||
/** MetaStore::getRealPath */
|
||||
class GetRealPathOp : public ReadOnlyOperation<GetRealPathRsp> {
|
||||
public:
|
||||
GetRealPathOp(MetaStore &meta, const GetRealPathReq &req)
|
||||
: ReadOnlyOperation<GetRealPathRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<GetRealPathRsp> run(IReadOnlyTransaction &txn) override {
|
||||
XLOGF(DBG, "GetRealPathOp: {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
DirEntry entry;
|
||||
if (req_.path.path.has_value()) {
|
||||
Path trace;
|
||||
auto result = co_await resolve(txn, req_.user, &trace).dirEntry(req_.path, AtFlags(AT_SYMLINK_FOLLOW));
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
if (!req_.absolute) {
|
||||
co_return GetRealPathRsp(simplifyPath(trace));
|
||||
}
|
||||
entry = std::move(*result);
|
||||
} else {
|
||||
auto inode = (co_await Inode::snapshotLoad(txn, req_.path.parent)).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
if (!inode->isDirectory()) {
|
||||
co_return makeError(MetaCode::kNotDirectory, "Only support get absolute path of directory");
|
||||
} else if (UNLIKELY(inode->id.isTreeRoot())) {
|
||||
co_return GetRealPathRsp("/");
|
||||
}
|
||||
|
||||
auto result = co_await inode->snapshotLoadDirEntry(txn);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
entry = std::move(*result);
|
||||
}
|
||||
|
||||
Path path = entry.name;
|
||||
while (!entry.parent.isTreeRoot()) {
|
||||
auto parent = (co_await Inode::snapshotLoad(txn, entry.parent)).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(parent);
|
||||
auto result = co_await parent->snapshotLoadDirEntry(txn);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
entry = std::move(*result);
|
||||
XLOGF(DBG, "get {}", entry.name);
|
||||
path = entry.name / path;
|
||||
}
|
||||
|
||||
co_return simplifyPath("/" / path);
|
||||
}
|
||||
|
||||
private:
|
||||
const GetRealPathReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<GetRealPathRsp> MetaStore::getRealPath(const GetRealPathReq &req) {
|
||||
return std::make_unique<GetRealPathOp>(*this, req);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
131
src/meta/store/ops/HardLink.cc
Normal file
131
src/meta/store/ops/HardLink.cc
Normal file
@@ -0,0 +1,131 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <fcntl.h>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <limits>
|
||||
#include <linux/fs.h>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/ops/SetAttr.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class HardLinkOp : public Operation<HardLinkRsp> {
|
||||
public:
|
||||
HardLinkOp(MetaStore &meta, const HardLinkReq &req)
|
||||
: Operation<HardLinkRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<HardLinkRsp> run(IReadWriteTransaction &txn) override {
|
||||
XLOGF(DBG, "HardLinkOp: {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
// check name valid
|
||||
CO_RETURN_ON_ERROR(req_.newPath.validForCreate());
|
||||
|
||||
auto resolveResult = co_await resolve(txn, req_.user).path(req_.newPath, req_.flags);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
if (resolveResult->dirEntry.has_value()) {
|
||||
auto &entry = *resolveResult->dirEntry;
|
||||
if (entry.uuid != Uuid::zero() && entry.uuid == req_.uuid) {
|
||||
// this may happens when FDB returns commit_unknown_result, or we failed to send response to client
|
||||
XLOGF(CRITICAL, "HardLink already created, dst {}, req {}, uuid {}", entry, req_, req_.uuid);
|
||||
auto inode = co_await entry.snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return HardLinkRsp(std::move(*inode));
|
||||
}
|
||||
co_return makeError(MetaCode::kExists, fmt::format("hardlink exists, req {}, uuid {}", req_, entry));
|
||||
}
|
||||
|
||||
auto target = co_await resolve(txn, req_.user).inode(req_.oldPath, req_.flags, true /* checkRefCnt */);
|
||||
CO_RETURN_ON_ERROR(target);
|
||||
auto &inode = *target;
|
||||
|
||||
// check permission and lock
|
||||
auto parent = co_await resolveResult->getParentInode(txn);
|
||||
CO_RETURN_ON_ERROR(parent);
|
||||
CO_RETURN_ON_ERROR(parent->acl.checkPermission(req_.user, AccessType::WRITE));
|
||||
CO_RETURN_ON_ERROR(parent->asDirectory().checkLock(req_.client));
|
||||
if (inode.acl.iflags & FS_IMMUTABLE_FL) {
|
||||
co_return makeError(MetaCode::kNoPermission, fmt::format("FS_IMMUTABLE_FL set on target inode {}", inode.id));
|
||||
}
|
||||
|
||||
assert(inode.nlink);
|
||||
|
||||
InodeId parentId = resolveResult->getParentId();
|
||||
DirEntry entry;
|
||||
switch (inode.getType()) {
|
||||
case InodeType::File:
|
||||
entry = DirEntry::newFile(parentId, req_.newPath.path->filename().native(), inode.id);
|
||||
break;
|
||||
case InodeType::Directory:
|
||||
co_return makeError(MetaCode::kIsDirectory);
|
||||
case InodeType::Symlink:
|
||||
entry = DirEntry::newSymlink(parentId, req_.newPath.path->filename().native(), inode.id);
|
||||
break;
|
||||
default:
|
||||
XLOGF(FATAL, "Found invalid inode type {}", (int)inode.getType());
|
||||
}
|
||||
|
||||
entry.uuid = req_.uuid;
|
||||
|
||||
if (inode.nlink == std::numeric_limits<uint16_t>::max()) {
|
||||
XLOGF(ERR, "Inode {} has {} links, can't add more hard link!", inode.id, inode.nlink);
|
||||
co_return makeError(MetaCode::kNoPermission, "nlink == uint16_t::max");
|
||||
}
|
||||
|
||||
// NOTE: create dirEntry, add parent inode and dirEntry into read conflict set.
|
||||
// add parent inode into read conflict set to prevent parent is removed concurrently
|
||||
CO_RETURN_ON_ERROR(co_await Inode(parentId).addIntoReadConflict(txn));
|
||||
// add directory entry into read conflict set to prevent concurrent create
|
||||
CO_RETURN_ON_ERROR(co_await entry.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
|
||||
// NOTE: add link count of inode, add inode into read conflict set since this is a read modify write
|
||||
assert(!inode.isDirectory());
|
||||
inode.nlink++;
|
||||
SetAttr::update(inode.ctime, UtcClock::now(), config().time_granularity(), true);
|
||||
CO_RETURN_ON_ERROR(co_await inode.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
|
||||
addEvent(Event::Type::HardLink)
|
||||
.addField("parent", entry.parent)
|
||||
.addField("name", entry.name)
|
||||
.addField("inode", inode.id)
|
||||
.addField("owner", inode.acl.uid)
|
||||
.addField("user", req_.user.uid)
|
||||
.addField("host", req_.client.hostname)
|
||||
.addField("nlink", inode.nlink);
|
||||
addTrace(MetaEventTrace{
|
||||
.eventType = Event::Type::HardLink,
|
||||
.inodeId = inode.id,
|
||||
.parentId = entry.parent,
|
||||
.entryName = entry.name,
|
||||
.ownerId = inode.acl.uid,
|
||||
.userId = req_.user.uid,
|
||||
.client = req_.client,
|
||||
.nlink = inode.nlink,
|
||||
});
|
||||
co_return HardLinkRsp(std::move(inode));
|
||||
}
|
||||
|
||||
private:
|
||||
const HardLinkReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<HardLinkRsp> MetaStore::hardLink(const HardLinkReq &req) {
|
||||
return std::make_unique<HardLinkOp>(*this, req);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
68
src/meta/store/ops/List.cc
Normal file
68
src/meta/store/ops/List.cc
Normal file
@@ -0,0 +1,68 @@
|
||||
#include <fcntl.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/monitor/Sample.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
namespace {
|
||||
monitor::DistributionRecorder listDist("meta_server:list_entries");
|
||||
}
|
||||
|
||||
/** MetaStore::list */
|
||||
class ListOp : public ReadOnlyOperation<ListRsp> {
|
||||
public:
|
||||
ListOp(MetaStore &meta, const ListReq &req)
|
||||
: ReadOnlyOperation<ListRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<ListRsp> run(IReadOnlyTransaction &txn) override {
|
||||
XLOGF(DBG, "ListOp: {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
auto result =
|
||||
co_await resolve(txn, req_.user).inode(req_.path, AtFlags(AtFlags(AT_SYMLINK_FOLLOW)), true /* checkRefCnt */);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
|
||||
auto &inode = result.value();
|
||||
if (!inode.isDirectory()) {
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
if (!inode.acl.checkPermission(req_.user, AccessType::READ)) {
|
||||
co_return makeError(MetaCode::kNoPermission);
|
||||
}
|
||||
|
||||
auto list = co_await DirEntryList::snapshotLoad(txn,
|
||||
inode.id,
|
||||
req_.prev,
|
||||
req_.limit > 0 ? req_.limit : config().list_default_limit(),
|
||||
req_.status,
|
||||
config().batch_stat_concurrent());
|
||||
CO_RETURN_ON_ERROR(list);
|
||||
|
||||
listDist.addSample(list->entries.size(), {{"uid", folly::to<std::string>(req_.user.uid)}});
|
||||
|
||||
co_return ListRsp(std::move(*list));
|
||||
}
|
||||
|
||||
private:
|
||||
const ListReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<ListRsp> MetaStore::list(const ListReq &req) { return std::make_unique<ListOp>(*this, req); }
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
69
src/meta/store/ops/LockDirectory.cc
Normal file
69
src/meta/store/ops/LockDirectory.cc
Normal file
@@ -0,0 +1,69 @@
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
class LockDirectoryOp : public Operation<LockDirectoryRsp> {
|
||||
public:
|
||||
LockDirectoryOp(MetaStore &meta, const LockDirectoryReq &req)
|
||||
: Operation<LockDirectoryRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
CoTryTask<LockDirectoryRsp> run(IReadWriteTransaction &txn) override {
|
||||
CO_RETURN_ON_ERROR(req_.valid());
|
||||
|
||||
auto inode = (co_await Inode::load(txn, req_.inode)).then(checkMetaFound<Inode>);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
CO_RETURN_ON_ERROR(inode->acl.checkPermission(req_.user, AccessType::WRITE));
|
||||
|
||||
if (!inode->isDirectory()) {
|
||||
co_return makeError(MetaCode::kNotDirectory, fmt::format("{} is not directory", inode->id));
|
||||
}
|
||||
|
||||
switch (req_.action) {
|
||||
case LockDirectoryReq::LockAction::TryLock:
|
||||
if (inode->asDirectory().lock && inode->asDirectory().lock->client.uuid != req_.client.uuid) {
|
||||
co_return makeError(MetaCode::kNoLock, fmt::format("lock hold by {}", *inode->asDirectory().lock));
|
||||
}
|
||||
case LockDirectoryReq::LockAction::PreemptLock:
|
||||
if (auto lock = Directory::Lock{req_.client}; inode->asDirectory().lock != lock) {
|
||||
inode->asDirectory().lock = lock;
|
||||
CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
||||
}
|
||||
co_return LockDirectoryRsp();
|
||||
case LockDirectoryReq::LockAction::UnLock:
|
||||
if (!inode->asDirectory().lock) {
|
||||
co_return makeError(MetaCode::kNoLock, "locked not owned");
|
||||
}
|
||||
if (inode->asDirectory().lock->client.uuid != req_.client.uuid) {
|
||||
co_return makeError(MetaCode::kNoLock, fmt::format("lock hold by {}", *inode->asDirectory().lock));
|
||||
}
|
||||
case LockDirectoryReq::LockAction::Clear:
|
||||
if (inode->asDirectory().lock) {
|
||||
inode->asDirectory().lock = std::nullopt;
|
||||
CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
||||
}
|
||||
co_return LockDirectoryRsp();
|
||||
default:
|
||||
XLOGF(DFATAL, "invalid action {}", (int)req_.action);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const LockDirectoryReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<LockDirectoryRsp> MetaStore::lockDirectory(const LockDirectoryReq &req) {
|
||||
return std::make_unique<LockDirectoryOp>(*this, req);
|
||||
}
|
||||
} // namespace hf3fs::meta::server
|
||||
136
src/meta/store/ops/Mkdirs.cc
Normal file
136
src/meta/store/ops/Mkdirs.cc
Normal file
@@ -0,0 +1,136 @@
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/Uuid.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/** MetaStore::mkdirs */
|
||||
class MkdirsOp : public Operation<MkdirsRsp> {
|
||||
public:
|
||||
MkdirsOp(MetaStore &meta, const MkdirsReq &req)
|
||||
: Operation<MkdirsRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<MkdirsRsp> run(IReadWriteTransaction &txn) override {
|
||||
XLOGF(DBG, "MkdirsOp: {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
auto resolveResult = co_await resolve(txn, req_.user).pathRange(req_.path);
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
auto curr = resolveResult->missing.begin();
|
||||
const auto end = resolveResult->missing.end();
|
||||
|
||||
if (curr == end) {
|
||||
assert(resolveResult->dirEntry.has_value());
|
||||
if (resolveResult->dirEntry->uuid != Uuid::zero() && resolveResult->dirEntry->uuid == req_.uuid) {
|
||||
// this may happens when FDB returns commit_unknown_result, or we failed to send response to client
|
||||
XLOGF(CRITICAL, "Mkdirs already finished, dst {}, req {}, uuid {}", *resolveResult->dirEntry, req_, req_.uuid);
|
||||
auto inode = co_await resolveResult->dirEntry->snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return MkdirsRsp(std::move(*inode));
|
||||
}
|
||||
co_return makeError(MetaCode::kExists);
|
||||
}
|
||||
|
||||
if (std::distance(curr, end) != 1 && !req_.recursive) {
|
||||
// some middle path components are missing and not recursive mkdirs
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
|
||||
auto parent = co_await resolveResult->getParentInode(txn);
|
||||
CO_RETURN_ON_ERROR(parent);
|
||||
CO_RETURN_ON_ERROR(parent->acl.checkPermission(req_.user, AccessType::WRITE));
|
||||
CO_RETURN_ON_ERROR(parent->asDirectory().checkLock(req_.client));
|
||||
|
||||
auto layout = req_.layout;
|
||||
if (!layout.has_value()) {
|
||||
// user doesn't specific layout, inherit parent directory's layout.
|
||||
layout = parent->asDirectory().layout;
|
||||
}
|
||||
|
||||
InodeId parentId = resolveResult->getParentId();
|
||||
// NOTE: add parent inode and dirEntry into read conflict set.
|
||||
// add parent inode into read conflict set to prevent parent is removed concurrently
|
||||
CO_RETURN_ON_ERROR(co_await Inode(parentId).addIntoReadConflict(txn));
|
||||
// add directory entry into read conflict set to prevent concurrent create
|
||||
CO_RETURN_ON_ERROR(co_await DirEntry(parentId, curr->native()).addIntoReadConflict(txn));
|
||||
|
||||
auto acl = Acl(req_.user.uid,
|
||||
req_.user.gid,
|
||||
Permission(req_.perm & ALLPERMS),
|
||||
IFlags(parent->acl.iflags & FS_FL_INHERITABLE));
|
||||
if (parent->acl.perm & S_ISGID) {
|
||||
// The set-group-ID bit (S_ISGID) has several special uses.
|
||||
// For a directory, it indicates that BSD semantics are to be used for that directory:
|
||||
// files created there inherit their group ID from the directory, not from the effective group ID of the creating
|
||||
// process, and directories created there will also get the S_ISGID bit set
|
||||
acl.gid = parent->acl.gid;
|
||||
acl.perm = Permission(acl.perm | S_ISGID);
|
||||
}
|
||||
|
||||
// create all path components
|
||||
FAULT_INJECTION_SET_FACTOR(std::distance(curr, end));
|
||||
while (true) {
|
||||
if (!curr->has_filename() || curr->filename_is_dot() || curr->filename_is_dot_dot()) {
|
||||
co_return makeError(StatusCode::kInvalidArg, "filename is '.' or '..'");
|
||||
}
|
||||
|
||||
auto inodeId = co_await allocateInodeId(txn, false);
|
||||
CO_RETURN_ON_ERROR(inodeId);
|
||||
|
||||
CO_RETURN_ON_ERROR(co_await chainAlloc().checkLayoutValid(layout.value()));
|
||||
|
||||
Inode inode = Inode::newDirectory(*inodeId, parentId, curr->filename().native(), acl, *layout, now());
|
||||
DirEntry entry = DirEntry::newDirectory(parentId, curr->native(), *inodeId, acl);
|
||||
entry.uuid = req_.uuid;
|
||||
|
||||
// create inode and dirEntry
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
|
||||
addEvent(Event::Type::Mkdir)
|
||||
.addField("parent", entry.parent)
|
||||
.addField("name", entry.name)
|
||||
.addField("inode", inode.id)
|
||||
.addField("user", inode.acl.uid)
|
||||
.addField("host", req_.client.hostname)
|
||||
.addField("chain_table", inode.asDirectory().layout.tableId);
|
||||
addTrace(MetaEventTrace{
|
||||
.eventType = Event::Type::Mkdir,
|
||||
.inodeId = inode.id,
|
||||
.parentId = entry.parent,
|
||||
.entryName = entry.name,
|
||||
.userId = inode.acl.uid,
|
||||
.client = req_.client,
|
||||
.tableId = inode.asDirectory().layout.tableId,
|
||||
});
|
||||
|
||||
curr++;
|
||||
parentId = *inodeId;
|
||||
if (curr == end) {
|
||||
co_return MkdirsRsp(std::move(inode));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const MkdirsReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<MkdirsRsp> MetaStore::mkdirs(const MkdirsReq &req) { return std::make_unique<MkdirsOp>(*this, req); }
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
308
src/meta/store/ops/Open.cc
Normal file
308
src/meta/store/ops/Open.cc
Normal file
@@ -0,0 +1,308 @@
|
||||
#include <cassert>
|
||||
#include <fcntl.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "meta/components/GcManager.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/PathResolve.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
#define BEGIN_WRITE() \
|
||||
if (this->isReadOnly()) { \
|
||||
auto msg = fmt::format("Op {}{} shouldn't be readonly!", MetaSerde<>::getRpcName(req_), req_); \
|
||||
XLOG(DFATAL, msg); \
|
||||
co_return makeError(MetaCode::kFoundBug, std::move(msg)); \
|
||||
} \
|
||||
auto &rwTxn = dynamic_cast<IReadWriteTransaction &>(txn);
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
monitor::CountRecorder openWrite("meta_server.open_write");
|
||||
|
||||
/** MetaStore::open */
|
||||
template <typename Req, typename Rsp>
|
||||
class OpenOp : public Operation<Rsp> {
|
||||
public:
|
||||
OpenOp(MetaStore &meta, Req &req)
|
||||
: Operation<Rsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
bool isReadOnly() final {
|
||||
return !req_.session.has_value() && req_.flags.accessType() == AccessType::READ && !req_.flags.contains(O_TRUNC) &&
|
||||
!req_.flags.contains(O_CREAT);
|
||||
}
|
||||
|
||||
CoTryTask<Rsp> run(IReadWriteTransaction &txn) final {
|
||||
XLOGF(DBG, "OpenOp: {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
if (!req_.path.path.has_value()) {
|
||||
// open by inodeId
|
||||
auto inode =
|
||||
co_await this->resolve(txn, req_.user)
|
||||
.inode(req_.path, AtFlags(AT_SYMLINK_FOLLOW) /* open/create follow symlink*/, true /* checkRefCnt */);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return co_await openExists(txn, std::nullopt, std::move(*inode), this->config().check_file_hole());
|
||||
} else {
|
||||
// open by path, can handle O_TRUNC by replace inode here.
|
||||
auto resolveResult = co_await this->resolve(txn, req_.user).path(req_.path, AtFlags(AT_SYMLINK_FOLLOW));
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
auto &entry = resolveResult->dirEntry;
|
||||
if (!entry.has_value()) {
|
||||
if constexpr (std::is_same_v<Req, CreateReq>) {
|
||||
req_.path = PathAt(resolveResult->getParentId(), req_.path.path->filename());
|
||||
}
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
assert(!entry->isSymlink());
|
||||
auto inode = co_await entry->snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return co_await openExists(txn, std::move(*entry), std::move(*inode), this->config().check_file_hole());
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<Rsp> openExists(IReadOnlyTransaction &txn, std::optional<DirEntry> entry, Inode inode, bool checkHole) {
|
||||
XLOGF(DBG, "inode {}", inode);
|
||||
assert(!entry.has_value() || inode.id == entry->id);
|
||||
if (prevCreatedInodeId_ == inode.id) {
|
||||
// this inode is created by us, just return here.
|
||||
if (entry.has_value()) {
|
||||
addCreateEvent(*entry, inode);
|
||||
}
|
||||
co_return Rsp(std::move(inode), false);
|
||||
}
|
||||
|
||||
if (req_.flags.contains(O_EXCL)) {
|
||||
co_return makeError(MetaCode::kExists);
|
||||
}
|
||||
|
||||
switch (inode.getType()) {
|
||||
case InodeType::Directory:
|
||||
co_return co_await openExistsDirectory(txn, inode);
|
||||
case InodeType::File:
|
||||
co_return co_await openExistsFile(txn, entry, inode, checkHole);
|
||||
default:
|
||||
XLOGF(FATAL, "inode {} invalid type {}", inode, (int)inode.getType());
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<Rsp> openExistsDirectory(IReadOnlyTransaction &txn, Inode &inode) {
|
||||
XLOGF_IF(FATAL, !inode.isDirectory(), "Inode {} is not directory", inode);
|
||||
if (req_.flags.accessType() != AccessType::READ || req_.flags.contains(O_TRUNC) || std::is_same_v<Req, CreateReq>) {
|
||||
co_return makeError(MetaCode::kIsDirectory);
|
||||
}
|
||||
CO_RETURN_ON_ERROR(inode.acl.checkPermission(req_.user, req_.flags.accessType()));
|
||||
co_return Rsp(std::move(inode), false);
|
||||
}
|
||||
|
||||
CoTryTask<Rsp> openExistsFile(IReadOnlyTransaction &txn,
|
||||
std::optional<DirEntry> &entry,
|
||||
Inode &inode,
|
||||
bool checkHole) {
|
||||
XLOGF_IF(FATAL, !inode.isFile(), "Inode {} is not file", inode);
|
||||
|
||||
bool dirty = false;
|
||||
|
||||
// check permission
|
||||
if (req_.flags.contains(O_DIRECTORY)) {
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
if (req_.flags.accessType() != AccessType::READ && (inode.acl.iflags & FS_IMMUTABLE_FL)) {
|
||||
co_return makeError(MetaCode::kNoPermission, fmt::format("FS_IMMUTABLE_FL set on inode {}", inode.id));
|
||||
}
|
||||
CO_RETURN_ON_ERROR(inode.acl.checkPermission(req_.user, req_.flags.accessType()));
|
||||
|
||||
// check hole
|
||||
auto rdonly = req_.flags.accessType() == AccessType::READ;
|
||||
if (rdonly && inode.asFile().hasHole() && checkHole) {
|
||||
XLOGF(WARN, "Inode {} contains hole, don't allow O_RDONLY", inode.id);
|
||||
co_return makeError(MetaCode::kFileHasHole);
|
||||
}
|
||||
|
||||
// handle otrunc
|
||||
bool otrunc = req_.flags.contains(O_TRUNC);
|
||||
XLOGF(DBG, "inode {}, otrunc {}", inode, otrunc);
|
||||
if (otrunc && entry.has_value()) {
|
||||
BEGIN_WRITE();
|
||||
auto replaced = co_await replaceExistsFile(rwTxn, *entry, inode);
|
||||
CO_RETURN_ON_ERROR(replaced);
|
||||
if (*replaced) {
|
||||
co_return Rsp(std::move(inode), false);
|
||||
}
|
||||
}
|
||||
|
||||
// clear SUID SGID sticky bits on write by non owner
|
||||
constexpr uint32_t sbits = S_ISUID | S_ISGID | S_ISVTX;
|
||||
static_assert(sbits == 07000);
|
||||
if (!rdonly && req_.user.uid != inode.acl.uid && (inode.acl.perm & sbits)) {
|
||||
inode.acl.perm = Permission(inode.acl.perm & (~sbits));
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (req_.session.has_value() && req_.flags.accessType() != AccessType::READ) {
|
||||
BEGIN_WRITE();
|
||||
CO_RETURN_ON_ERROR(co_await createSession(rwTxn, inode, req_.flags));
|
||||
|
||||
if (!req_.dynStripe && inode.asFile().dynStripe && inode.asFile().dynStripe < inode.asFile().layout.stripeSize) {
|
||||
inode.asFile().dynStripe = 0;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
BEGIN_WRITE();
|
||||
CO_RETURN_ON_ERROR(co_await inode.addIntoReadConflict(rwTxn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(rwTxn));
|
||||
}
|
||||
|
||||
XLOGF(DBG, "inode {}, otrunc {}", inode, otrunc);
|
||||
|
||||
co_return Rsp(std::move(inode), otrunc);
|
||||
}
|
||||
|
||||
CoTryTask<bool> replaceExistsFile(kv::IReadWriteTransaction &txn, DirEntry &entry, Inode &inode) {
|
||||
XLOGF(DBG, "Try to replace file {} on O_TRUNC", entry);
|
||||
assert(inode.isFile());
|
||||
if (!this->config().otrunc_replace_file() || inode.nlink != 1 ||
|
||||
inode.asFile().length < this->config().otrunc_replace_file_threshold()) {
|
||||
XLOGF(DBG,
|
||||
"Can't replace {}, enable replace {}, nlink {}, size {}",
|
||||
entry,
|
||||
this->config().otrunc_replace_file(),
|
||||
inode.nlink,
|
||||
inode.asFile().length);
|
||||
co_return false;
|
||||
}
|
||||
auto checkResult = co_await FileSession::checkExists(txn, inode.id);
|
||||
CO_RETURN_ON_ERROR(checkResult);
|
||||
if (*checkResult) {
|
||||
XLOGF(DBG, "Can't replace {}, has session", entry);
|
||||
co_return false;
|
||||
}
|
||||
|
||||
XLOGF(DBG, "Replace {} with a new inode", entry);
|
||||
auto old = inode;
|
||||
CO_RETURN_ON_ERROR(co_await this->gcManager().removeEntry(txn, entry, old, GcInfo{req_.user.uid, entry.name}));
|
||||
|
||||
// create new entry and inode
|
||||
auto inodeId = co_await this->allocateInodeId(txn, false);
|
||||
CO_RETURN_ON_ERROR(inodeId);
|
||||
entry = DirEntry::newFile(entry.parent, std::string(entry.name), *inodeId);
|
||||
inode = Inode::newFile(*inodeId, inode.acl, inode.asFile().layout, this->now());
|
||||
if (this->config().dynamic_stripe() && req_.dynStripe) {
|
||||
inode.asFile().dynStripe = std::min(this->config().dynamic_stripe_initial(), inode.asFile().layout.stripeSize);
|
||||
}
|
||||
|
||||
CO_RETURN_ON_ERROR(co_await createInodeAndEntry(txn, entry, inode, old));
|
||||
CO_RETURN_ON_ERROR(co_await createSession(txn, inode, req_.flags));
|
||||
|
||||
co_return true;
|
||||
}
|
||||
|
||||
CoTryTask<Void> createInodeAndEntry(IReadWriteTransaction &txn,
|
||||
DirEntry &entry,
|
||||
Inode &inode,
|
||||
std::optional<Inode> old = std::nullopt) {
|
||||
auto parentId = entry.parent;
|
||||
auto inodeId = entry.id;
|
||||
assert(inode.id == inodeId);
|
||||
|
||||
// NOTE: add parent inode and dirEntry into read conflict set.
|
||||
// add parent inode into read conflict set to prevent parent is removed concurrently
|
||||
CO_RETURN_ON_ERROR(co_await Inode(parentId).addIntoReadConflict(txn));
|
||||
// add directory entry into read conflict set to prevent concurrent create
|
||||
CO_RETURN_ON_ERROR(co_await entry.addIntoReadConflict(txn));
|
||||
|
||||
// create inode and dirEntry
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
prevCreatedInodeId_ = inodeId;
|
||||
|
||||
addCreateEvent(entry, inode, old);
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<Void> createSession(IReadWriteTransaction &txn, Inode &inode, OpenFlags oflags) {
|
||||
if (!inode.isFile()) {
|
||||
assert(false);
|
||||
co_return makeError(MetaCode::kNotFile);
|
||||
}
|
||||
if (!req_.session.has_value()) {
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
openWrite.addSample(1);
|
||||
auto session = FileSession::create(inode.id, req_.session.value());
|
||||
CO_RETURN_ON_ERROR(co_await session.store(txn));
|
||||
|
||||
this->addEvent(Event::Type::OpenWrite)
|
||||
.addField("inode", inode.id)
|
||||
.addField("owner", inode.acl.uid)
|
||||
.addField("user", req_.user.uid)
|
||||
.addField("host", req_.client.hostname)
|
||||
.addField("length", inode.asFile().length)
|
||||
.addField("truncateVer", inode.asFile().truncateVer)
|
||||
.addField("dynStripe", inode.asFile().dynStripe)
|
||||
.addField("otrunc", oflags.contains(O_TRUNC));
|
||||
this->addTrace(MetaEventTrace{
|
||||
.eventType = Event::Type::OpenWrite,
|
||||
.inodeId = inode.id,
|
||||
.ownerId = inode.acl.uid,
|
||||
.userId = req_.user.uid,
|
||||
.client = req_.client,
|
||||
.length = inode.asFile().length,
|
||||
.truncateVer = inode.asFile().truncateVer,
|
||||
.dynStripe = inode.asFile().dynStripe,
|
||||
.oflags = oflags,
|
||||
});
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
void addCreateEvent(const DirEntry &entry, const Inode &inode, std::optional<Inode> old = std::nullopt) {
|
||||
XLOGF_IF(DFATAL, (old.has_value() && !old->isFile()), "old {} is not file", *old);
|
||||
auto &event = this->addEvent(Event::Type::Create)
|
||||
.addField("parent", entry.parent)
|
||||
.addField("name", entry.name)
|
||||
.addField("inode", entry.id)
|
||||
.addField("user", req_.user.uid)
|
||||
.addField("host", req_.client.hostname)
|
||||
.addField("chain_table", inode.asFile().layout.tableId);
|
||||
if (old && old->isFile()) {
|
||||
event.addField("old_inode", old->id).addField("old_length", old->asFile().length);
|
||||
}
|
||||
this->addTrace(MetaEventTrace{
|
||||
.eventType = Event::Type::Create,
|
||||
.inodeId = entry.id,
|
||||
.parentId = entry.parent,
|
||||
.entryName = entry.name,
|
||||
.userId = req_.user.uid,
|
||||
.client = req_.client,
|
||||
.tableId = inode.asFile().layout.tableId,
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
Req &req_;
|
||||
std::optional<InodeId> prevCreatedInodeId_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<OpenRsp> MetaStore::open(OpenReq &req) {
|
||||
return std::make_unique<OpenOp<OpenReq, OpenRsp>>(*this, req);
|
||||
}
|
||||
|
||||
MetaStore::OpPtr<CreateRsp> MetaStore::tryOpen(CreateReq &req) {
|
||||
return std::make_unique<OpenOp<CreateReq, CreateRsp>>(*this, req);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
74
src/meta/store/ops/PruneSession.cc
Normal file
74
src/meta/store/ops/PruneSession.cc
Normal file
@@ -0,0 +1,74 @@
|
||||
#include <algorithm>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/experimental/coro/Collect.h>
|
||||
#include <folly/experimental/coro/Invoke.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "client/meta/MetaClient.h"
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/StatusCode.h"
|
||||
#include "meta/components/FileHelper.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/store/FileSession.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/** MetaStore::pruneSession */
|
||||
class PruneSessionOp : public Operation<PruneSessionRsp> {
|
||||
public:
|
||||
PruneSessionOp(MetaStore &meta, const PruneSessionReq &req)
|
||||
: Operation<PruneSessionRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<PruneSessionRsp> run(IReadWriteTransaction &txn) override {
|
||||
XLOGF(DBG, "PruneSessionOp::run, req {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
static constexpr size_t kConcurrentCheck = 32;
|
||||
std::vector<CoTryTask<void>> tasks;
|
||||
|
||||
auto waitRequests = [&]() -> CoTryTask<void> {
|
||||
auto results = co_await folly::coro::collectAllRange(std::exchange(tasks, {}));
|
||||
for (auto &rsp : results) {
|
||||
CO_RETURN_ON_ERROR(rsp);
|
||||
}
|
||||
co_return Void{};
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < req_.sessions.size(); i++) {
|
||||
auto sessionId = req_.sessions[i];
|
||||
tasks.push_back(prune(txn, sessionId));
|
||||
if (tasks.size() == kConcurrentCheck || i + 1 >= req_.sessions.size()) {
|
||||
CO_RETURN_ON_ERROR(co_await waitRequests());
|
||||
}
|
||||
}
|
||||
|
||||
co_return PruneSessionRsp();
|
||||
}
|
||||
|
||||
private:
|
||||
CoTryTask<void> prune(IReadWriteTransaction &txn, const Uuid sessionId) {
|
||||
auto session = FileSession::createPrune(req_.client, sessionId);
|
||||
CO_RETURN_ON_ERROR(co_await session.store(txn));
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
const PruneSessionReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<PruneSessionRsp> MetaStore::pruneSession(const PruneSessionReq &req) {
|
||||
return std::make_unique<PruneSessionOp>(*this, req);
|
||||
}
|
||||
} // namespace hf3fs::meta::server
|
||||
200
src/meta/store/ops/Remove.cc
Normal file
200
src/meta/store/ops/Remove.cc
Normal file
@@ -0,0 +1,200 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <fcntl.h>
|
||||
#include <fmt/core.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/experimental/coro/Collect.h>
|
||||
#include <folly/experimental/coro/CurrentExecutor.h>
|
||||
#include <folly/experimental/coro/Invoke.h>
|
||||
#include <folly/fibers/BatchSemaphore.h>
|
||||
#include <folly/fibers/Semaphore.h>
|
||||
#include <folly/futures/Future.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <linux/fs.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/kv/WithTransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "fdb/FDBRetryStrategy.h"
|
||||
#include "meta/components/GcManager.h"
|
||||
#include "meta/components/SessionManager.h"
|
||||
#include "meta/event/Event.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/PathResolve.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/** Remove. */
|
||||
class RemoveOp : public Operation<RemoveRsp> {
|
||||
public:
|
||||
RemoveOp(MetaStore &meta, const RemoveReq &req)
|
||||
: Operation<RemoveRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
bool needIdempotent(Uuid &clientId, Uuid &requestId) const override {
|
||||
if (!req_.checkUuid()) {
|
||||
return false;
|
||||
}
|
||||
if (req_.recursive || config().idempotent_remove()) {
|
||||
clientId = req_.client.uuid;
|
||||
requestId = req_.uuid;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
CoTryTask<RemoveRsp> run(IReadWriteTransaction &txn) override {
|
||||
XLOGF(DBG, "RemoveOp: {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
Result<PathResolveOp::ResolveResult> resolveResult = makeError(MetaCode::kFoundBug);
|
||||
if (req_.path.path.has_value()) {
|
||||
resolveResult = co_await resolve(txn, req_.user)
|
||||
.path(req_.path, AtFlags(AT_SYMLINK_NOFOLLOW) /* remove shouldn't follow symlink */);
|
||||
} else {
|
||||
resolveResult = co_await resolve(txn, req_.user).byDirectoryInodeId(req_.path.parent);
|
||||
}
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
|
||||
if (!resolveResult->dirEntry.has_value()) {
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
} else if (resolveResult->dirEntry->id.isTreeRoot()) {
|
||||
// don't permit remove root
|
||||
co_return MAKE_ERROR_F(MetaCode::kNoPermission, "Can't remove tree root {}", *resolveResult->dirEntry);
|
||||
}
|
||||
// check src InodeId
|
||||
if (req_.inodeId && resolveResult->dirEntry->id != req_.inodeId) {
|
||||
co_return MAKE_ERROR_F(MetaCode::kNotFound, "remove {}, inodeId != {}", *resolveResult->dirEntry, *req_.inodeId);
|
||||
}
|
||||
|
||||
// check permission, must have write permission to parent directory, and not locked
|
||||
auto parent = co_await resolveResult->getParentInode(txn);
|
||||
CO_RETURN_ON_ERROR(parent);
|
||||
CO_RETURN_ON_ERROR(parent->acl.checkPermission(req_.user, AccessType::WRITE));
|
||||
CO_RETURN_ON_ERROR(parent->asDirectory().checkLock(req_.client));
|
||||
auto &entry = resolveResult->dirEntry.value();
|
||||
if (req_.checkType) {
|
||||
if (req_.atFlags.contains(AT_REMOVEDIR) && entry.isFile()) {
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
if (!req_.atFlags.contains(AT_REMOVEDIR) && entry.isDirectory()) {
|
||||
co_return makeError(MetaCode::kIsDirectory);
|
||||
}
|
||||
}
|
||||
|
||||
// The sticky bit (S_ISVTX) on a directory means that a file in that directory can be renamed or deleted
|
||||
// only by the owner of the file, by the owner of the directory, and by a privileged process.
|
||||
auto loadInodeResult = co_await entry.snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(loadInodeResult);
|
||||
auto &inode = *loadInodeResult;
|
||||
if ((parent->acl.perm & S_ISVTX) && req_.user.uid != parent->acl.uid && !req_.user.isRoot() &&
|
||||
req_.user.uid != inode.acl.uid) {
|
||||
auto msg = fmt::format("can't remove {}, S_ISVTX set on parent {} {}", entry, parent->id, parent->acl);
|
||||
XLOG(DBG, msg);
|
||||
co_return makeError(MetaCode::kNoPermission, msg);
|
||||
}
|
||||
if (inode.acl.iflags & FS_IMMUTABLE_FL) {
|
||||
auto msg = fmt::format("can't remove {}, FS_IMMUTABLE_FL set on inode", entry);
|
||||
XLOG(DBG, msg);
|
||||
co_return makeError(MetaCode::kNoPermission, msg);
|
||||
}
|
||||
|
||||
auto type = std::string(magic_enum::enum_name(inode.getType()));
|
||||
folly::toLowerAscii(type);
|
||||
|
||||
auto event = Event(Event::Type::Remove);
|
||||
event.addField("parent", entry.parent)
|
||||
.addField("name", entry.name)
|
||||
.addField("inode", entry.id)
|
||||
.addField("type", type)
|
||||
.addField("owner", entry.dirAcl->uid)
|
||||
.addField("nlink", inode.nlink - 1)
|
||||
.addField("user", req_.user.uid)
|
||||
.addField("host", req_.client.hostname);
|
||||
auto trace = MetaEventTrace{.eventType = Event::Type::Remove,
|
||||
.inodeId = entry.id,
|
||||
.parentId = entry.parent,
|
||||
.entryName = entry.name,
|
||||
.ownerId = inode.acl.uid,
|
||||
.userId = req_.user.uid,
|
||||
.client = req_.client,
|
||||
.inodeType = inode.getType(),
|
||||
.nlink = inode.nlink,
|
||||
.recursiveRemove = req_.recursive};
|
||||
|
||||
auto gcInfo = GcInfo{req_.user.uid, entry.name};
|
||||
if (entry.isDirectory()) {
|
||||
event.addField("recursive", req_.recursive);
|
||||
|
||||
auto result = co_await DirEntryList::checkEmpty(txn, entry.id);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
if (auto empty = *result; empty) {
|
||||
XLOGF_IF(DFATAL, inode.nlink != 1, "Directory {} nlink != 1", inode);
|
||||
// remove directory directly
|
||||
CO_RETURN_ON_ERROR(co_await entry.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await entry.remove(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.remove(txn));
|
||||
|
||||
addEvent(event);
|
||||
addTrace(std::move(trace));
|
||||
|
||||
co_return RemoveRsp{};
|
||||
}
|
||||
|
||||
if (!req_.recursive) {
|
||||
co_return makeError(MetaCode::kNotEmpty);
|
||||
}
|
||||
|
||||
CO_RETURN_ON_ERROR(inode.acl.checkRecursiveRmPerm(req_.user, config().recursive_remove_check_owner()));
|
||||
auto recursiveCheck = config().recursive_remove_perm_check();
|
||||
if (recursiveCheck) {
|
||||
auto res = co_await DirEntryList::recursiveCheckRmPerm(txn, inode.id, req_.user, recursiveCheck, 128);
|
||||
CO_RETURN_ON_ERROR(res);
|
||||
}
|
||||
|
||||
// recursive remove, save original path
|
||||
auto ancestors = std::vector<Inode>();
|
||||
CO_RETURN_ON_ERROR(co_await Inode::loadAncestors(txn, ancestors, entry.parent));
|
||||
for (auto &ancestor : ancestors) {
|
||||
gcInfo.origPath = ancestor.asDirectory().name / gcInfo.origPath;
|
||||
}
|
||||
event.addField("origPath", gcInfo.origPath.string());
|
||||
trace.origPath = gcInfo.origPath;
|
||||
}
|
||||
|
||||
auto result = co_await gcManager().removeEntry(txn, entry, inode, gcInfo);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
|
||||
addEvent(event);
|
||||
addTrace(std::move(trace));
|
||||
|
||||
co_return RemoveRsp{};
|
||||
}
|
||||
|
||||
private:
|
||||
const RemoveReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<RemoveRsp> MetaStore::remove(const RemoveReq &req) { return std::make_unique<RemoveOp>(*this, req); }
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
369
src/meta/store/ops/Rename.cc
Normal file
369
src/meta/store/ops/Rename.cc
Normal file
@@ -0,0 +1,369 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <fcntl.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/experimental/coro/Collect.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <linux/fs.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/FaultInjection.h"
|
||||
#include "common/utils/Path.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/core/user/User.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/components/GcManager.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/PathResolve.h"
|
||||
#include "meta/store/Utils.h"
|
||||
#include "meta/store/ops/SetAttr.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/** MetaStore::rename */
|
||||
/**
|
||||
* Note: rename operation in POSIX and HDFS has different semantic when destination exists.
|
||||
* In POSIX, if destination is a file or empty directory, it will be replaced automatilally (special case:
|
||||
* set RENAME_NOREPLACE flags in renameat2).
|
||||
* In HDFS,
|
||||
* - if destination is a file, rename operation will raise FileAlreadyExistsException;
|
||||
* - if destination is a directory and source is file, source will be moved under destination (eg: mv file dir ->
|
||||
* dir/file);
|
||||
* - if both source and destination are directories, all children of source will be moved under destination recursively
|
||||
* (we have decided to not provide this semantic because it's too complicated).
|
||||
*
|
||||
* This function implements POSIX semantic.
|
||||
*/
|
||||
class RenameOp : public Operation<RenameRsp> {
|
||||
public:
|
||||
RenameOp(MetaStore &meta, const RenameReq &req)
|
||||
: Operation<RenameRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
static bool underTrash(const std::vector<Inode> &ancestors) {
|
||||
return ancestors.size() >= 2 && ancestors[ancestors.size() - 1].id == InodeId::root() &&
|
||||
ancestors[ancestors.size() - 2].asDirectory().name == "trash";
|
||||
}
|
||||
|
||||
bool needIdempotent(Uuid &clientId, Uuid &requestId) const override {
|
||||
if (!req_.checkUuid()) return false;
|
||||
if (!req_.moveToTrash && !config().idempotent_rename()) return false;
|
||||
clientId = req_.client.uuid;
|
||||
requestId = req_.uuid;
|
||||
return true;
|
||||
}
|
||||
|
||||
CoTryTask<Void> checkLoop(IReadWriteTransaction &txn,
|
||||
const PathResolveOp::ResolveResult &srcResult,
|
||||
const PathResolveOp::ResolveResult &dstResult,
|
||||
std::optional<Path> &origPath) {
|
||||
auto dstAncestors = std::vector<Inode>();
|
||||
CO_RETURN_ON_ERROR(co_await Inode::loadAncestors(txn, dstAncestors, dstResult.getParentId()));
|
||||
assert(!dstAncestors.empty());
|
||||
|
||||
for (auto &ancestor : dstAncestors) {
|
||||
// src is not dst's ancestor
|
||||
if (ancestor.id == srcResult.dirEntry->id) {
|
||||
// try to move directory into it's descendent
|
||||
co_return makeError(StatusCode::kInvalidArg, "try to move directory into it's descendent");
|
||||
}
|
||||
|
||||
// move into a deleted directory
|
||||
if (ancestor.nlink == 0) {
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
|
||||
// check root
|
||||
if (ancestor.id == ancestor.asDirectory().parent) {
|
||||
if (ancestor.id == InodeId::root()) {
|
||||
break;
|
||||
} else if (ancestor.id == InodeId::gcRoot()) {
|
||||
XLOGF(ERR, "RenameOp: {} move directory into a removed directory", req_);
|
||||
co_return makeError(MetaCode::kNoPermission);
|
||||
} else {
|
||||
XLOGF(DFATAL, "Inode {} parent is itself", ancestor);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (underTrash(dstAncestors)) {
|
||||
XLOGF_IF(FATAL, !srcResult.dirEntry->isDirectory(), "{} not directory", *srcResult.dirEntry);
|
||||
auto srcAncestors = std::vector<Inode>();
|
||||
CO_RETURN_ON_ERROR(co_await Inode::loadAncestors(txn, srcAncestors, srcResult.getParentId()));
|
||||
|
||||
if (req_.moveToTrash || config().allow_directly_move_to_trash()) {
|
||||
auto acl = srcResult.dirEntry->dirAcl;
|
||||
if (!acl) {
|
||||
XLOGF(DFATAL, "DirEntry {} is directory, but don't have acl", *srcResult.dirEntry);
|
||||
co_return makeError(MetaCode::kFoundBug);
|
||||
}
|
||||
// try to move a directory into trash directory, should be owner and have rwx permission
|
||||
CO_RETURN_ON_ERROR(acl->checkRecursiveRmPerm(req_.user, config().recursive_remove_check_owner()));
|
||||
|
||||
auto recursiveCheck = config().recursive_remove_perm_check();
|
||||
if (recursiveCheck) {
|
||||
auto res =
|
||||
co_await DirEntryList::recursiveCheckRmPerm(txn, srcResult.dirEntry->id, req_.user, recursiveCheck, 128);
|
||||
CO_RETURN_ON_ERROR(res);
|
||||
}
|
||||
} else if (req_.user.uid != flat::Uid(0)) {
|
||||
// src should already in trash
|
||||
if (!underTrash(srcAncestors)) {
|
||||
co_return makeError(MetaCode::kNoPermission, "try to move into trash directory without moveToTrash");
|
||||
}
|
||||
}
|
||||
|
||||
origPath = Path(srcResult.dirEntry->name);
|
||||
for (auto &ancestor : srcAncestors) {
|
||||
origPath = ancestor.asDirectory().name / *origPath;
|
||||
}
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<Void> snapshotLoadInode(IReadWriteTransaction &txn, const DirEntry &entry, std::optional<Inode> &inode) {
|
||||
if (!inode.has_value()) {
|
||||
auto result = co_await entry.snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
inode = std::move(*result);
|
||||
}
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<Void> checkPermission(IReadWriteTransaction &txn,
|
||||
PathResolveOp::ResolveResult &resolve,
|
||||
std::optional<Inode> &inode,
|
||||
bool dst) {
|
||||
auto parent = co_await resolve.getParentInode(txn);
|
||||
CO_RETURN_ON_ERROR(parent);
|
||||
CO_RETURN_ON_ERROR(parent->acl.checkPermission(req_.user, AccessType::WRITE));
|
||||
CO_RETURN_ON_ERROR(parent->asDirectory().checkLock(req_.client));
|
||||
if (dst && !parent->nlink) {
|
||||
// can't rename into a removed directory
|
||||
co_return makeError(MetaCode::kNotFound);
|
||||
}
|
||||
if (!resolve.dirEntry.has_value()) {
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
auto &entry = *resolve.dirEntry;
|
||||
CO_RETURN_ON_ERROR(co_await snapshotLoadInode(txn, entry, inode));
|
||||
if (inode->acl.iflags & FS_IMMUTABLE_FL) {
|
||||
auto msg = fmt::format("rename can't move {}, FS_IMMUTABLE_FL set on inode", entry);
|
||||
XLOG(DBG, msg);
|
||||
co_return makeError(MetaCode::kNoPermission, msg);
|
||||
}
|
||||
|
||||
// The sticky bit (S_ISVTX) on a directory means that a file in that directory can be renamed or deleted
|
||||
// only by the owner of the file, by the owner of the directory, and by a privileged process.
|
||||
if ((parent->acl.perm & S_ISVTX) && req_.user.uid != parent->acl.uid && !req_.user.isRoot()) {
|
||||
// not owner of directory and not owner of privileged process, should be owner of file
|
||||
if (req_.user.uid != inode->acl.uid) {
|
||||
auto msg = fmt::format("rename can't move {} {}, S_ISVTX set on parent {} {}",
|
||||
entry,
|
||||
inode->acl,
|
||||
resolve.getParentId(),
|
||||
parent->acl);
|
||||
XLOG(DBG, msg);
|
||||
co_return makeError(MetaCode::kNoPermission, msg);
|
||||
}
|
||||
}
|
||||
|
||||
co_return Void{};
|
||||
}
|
||||
|
||||
CoTryTask<std::optional<std::pair<InodeId, uint16_t>>> removeDst(IReadWriteTransaction &txn,
|
||||
PathResolveOp::ResolveResult &dst,
|
||||
std::optional<Inode> &dstInode) {
|
||||
if (!dst.dirEntry.has_value()) {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
assert(dst.dirEntry->name == req_.dest.path->filename().native());
|
||||
if (dst.dirEntry->isFile()) {
|
||||
// let GC task free file chunks.
|
||||
CO_RETURN_ON_ERROR(co_await snapshotLoadInode(txn, *dst.dirEntry, dstInode));
|
||||
CO_RETURN_ON_ERROR(
|
||||
co_await gcManager().removeEntry(txn, *dst.dirEntry, *dstInode, GcInfo{req_.user.uid, dst.dirEntry->name}));
|
||||
assert(dstInode->id == dst.dirEntry->id);
|
||||
co_return std::pair<InodeId, uint16_t>{dstInode->id, dstInode->nlink};
|
||||
} else if (dst.dirEntry->isDirectory()) {
|
||||
// empty directory, can remove Inode directly
|
||||
CO_RETURN_ON_ERROR(co_await Inode(dst.dirEntry->id).remove(txn));
|
||||
co_return std::pair<InodeId, uint16_t>{dst.dirEntry->id, 0};
|
||||
} else {
|
||||
XLOGF_IF(DFATAL, !dst.dirEntry->isSymlink(), "{} not symlink, shouldn't happen", *dst.dirEntry);
|
||||
// need load inode and check refcnt
|
||||
auto inode = co_await dst.dirEntry->loadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
if (UNLIKELY(inode->nlink == 0)) {
|
||||
auto msg = fmt::format("entry {} exists, but inode {} nlink == 0", *dst.dirEntry, inode);
|
||||
XLOG(DFATAL, msg);
|
||||
co_return makeError(MetaCode::kFoundBug, msg);
|
||||
}
|
||||
// NOTE: The fuse client may have cached this symlink. If delete it immediately, kNotFound will be reported for
|
||||
// subsequent visits. The temporary solution is not to delete the symlink inode. This problem needs to be resolved
|
||||
// later.
|
||||
SetAttr::update(inode->ctime, UtcClock::now(), config().time_granularity(), true);
|
||||
auto refcnt = --inode->nlink;
|
||||
CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
||||
// if (refcnt != 0) {
|
||||
// CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
||||
// } else {
|
||||
// CO_RETURN_ON_ERROR(co_await inode->remove(txn));
|
||||
// }
|
||||
co_return std::pair<InodeId, uint16_t>{dst.dirEntry->id, refcnt};
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<RenameRsp> run(IReadWriteTransaction &txn) override {
|
||||
XLOGF(DBG, "RenameOp: {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
auto [srcResult, dstResult] =
|
||||
co_await folly::coro::collectAll(resolve(txn, req_.user).path(req_.src, AtFlags(AT_SYMLINK_NOFOLLOW)),
|
||||
resolve(txn, req_.user).path(req_.dest, AtFlags(AT_SYMLINK_NOFOLLOW)));
|
||||
CO_RETURN_ON_ERROR(srcResult);
|
||||
CO_RETURN_ON_ERROR(dstResult);
|
||||
|
||||
// check dst, transaction may already executed.
|
||||
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->uuid != Uuid::zero() &&
|
||||
dstResult->dirEntry->uuid == req_.uuid) {
|
||||
// this may happens when FDB returns commit_unknown_result, or we failed to send response to client
|
||||
XLOGF(CRITICAL, "Rename already finished, dst {}, req {}, uuid {}", *dstResult->dirEntry, req_, req_.uuid);
|
||||
auto inode = co_await dstResult->dirEntry->snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return RenameRsp(std::move(*inode));
|
||||
}
|
||||
|
||||
// src should exists
|
||||
if (!srcResult->dirEntry.has_value()) {
|
||||
co_return MAKE_ERROR_F(MetaCode::kNotFound, "rename src {} not found", req_.src);
|
||||
}
|
||||
// check src InodeId
|
||||
if (req_.inodeId && srcResult->dirEntry->id != req_.inodeId) {
|
||||
co_return MAKE_ERROR_F(MetaCode::kNotFound, "rename src {}, inodeId != {}", *srcResult->dirEntry, *req_.inodeId);
|
||||
}
|
||||
// if src and dst points to same dir entry, do nothing
|
||||
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->parent == srcResult->dirEntry->parent &&
|
||||
dstResult->dirEntry->name == srcResult->dirEntry->name) {
|
||||
auto inode = co_await dstResult->dirEntry->snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return RenameRsp(std::move(*inode));
|
||||
}
|
||||
// move to trash shouldn't replace file already exists
|
||||
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->isFile() && req_.moveToTrash) {
|
||||
co_return MAKE_ERROR_F(MetaCode::kExists, "rename dest {} exist", req_.dest);
|
||||
}
|
||||
// dst shouldn't be a non-empty directory
|
||||
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->isDirectory()) {
|
||||
auto checkResult = co_await DirEntryList::checkEmpty(txn, dstResult->dirEntry->id);
|
||||
CO_RETURN_ON_ERROR(checkResult);
|
||||
bool empty = checkResult.value();
|
||||
if (!empty) {
|
||||
co_return MAKE_ERROR_F(MetaCode::kNotEmpty, "rename dest {} not empty", req_.dest);
|
||||
}
|
||||
}
|
||||
// now, dst can be safely replaced (not exist, empty directory, file, symlink).
|
||||
std::optional<Path> origPath;
|
||||
if (srcResult->dirEntry->isDirectory()) {
|
||||
if (dstResult->dirEntry.has_value() && !dstResult->dirEntry->isDirectory()) {
|
||||
// man 2 rename: oldpath can specify a directory. In this case, newpath must either not exist, or it must
|
||||
// specify an empty directory.
|
||||
co_return makeError(MetaCode::kNotDirectory);
|
||||
}
|
||||
CO_RETURN_ON_ERROR(co_await checkLoop(txn, *srcResult, *dstResult, origPath));
|
||||
}
|
||||
|
||||
// permission check
|
||||
std::optional<Inode> srcInode, dstInode;
|
||||
CO_RETURN_ON_ERROR(co_await checkPermission(txn, *srcResult, srcInode, false));
|
||||
CO_RETURN_ON_ERROR(co_await checkPermission(txn, *dstResult, dstInode, true));
|
||||
|
||||
// NOTE: add src/dst's parent inode and dirEntry into read conflict set.
|
||||
CO_RETURN_ON_ERROR(co_await Inode(srcResult->getParentId()).addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await srcResult->dirEntry->addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await Inode(dstResult->getParentId()).addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(
|
||||
co_await DirEntry(dstResult->getParentId(), req_.dest.path->filename().native()).addIntoReadConflict(txn));
|
||||
|
||||
auto &srcEntry = srcResult->dirEntry.value();
|
||||
auto inodeResult = co_await srcEntry.loadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inodeResult);
|
||||
auto &inode = inodeResult.value();
|
||||
if (srcEntry.isDirectory()) {
|
||||
// NOTE: add src's inode into read conflict set.
|
||||
// load inode and update it's parent, read modify write, should use load.
|
||||
inode.asDirectory().parent = dstResult->getParentId();
|
||||
inode.asDirectory().name = req_.dest.path->filename().native();
|
||||
auto updateInodeResult = co_await inode.store(txn);
|
||||
CO_RETURN_ON_ERROR(updateInodeResult);
|
||||
}
|
||||
|
||||
// remove src entry and dst entry
|
||||
CO_RETURN_ON_ERROR(co_await srcEntry.remove(txn));
|
||||
auto removeDstResult = co_await removeDst(txn, *dstResult, dstInode);
|
||||
CO_RETURN_ON_ERROR(removeDstResult);
|
||||
auto &oldDst = *removeDstResult;
|
||||
|
||||
// create dst entry
|
||||
DirEntry newDstEntry(dstResult->getParentId(), req_.dest.path->filename().native());
|
||||
newDstEntry.data() = srcEntry.data();
|
||||
newDstEntry.uuid = req_.uuid;
|
||||
CO_RETURN_ON_ERROR(co_await newDstEntry.store(txn));
|
||||
|
||||
auto &event = addEvent(Event::Type::Rename)
|
||||
.addField("srcParent", srcEntry.parent)
|
||||
.addField("srcName", srcEntry.name)
|
||||
.addField("dstParent", newDstEntry.parent)
|
||||
.addField("dstName", newDstEntry.name)
|
||||
.addField("inode", newDstEntry.id)
|
||||
.addField("user", req_.user.uid)
|
||||
.addField("host", req_.client.hostname);
|
||||
addTrace(MetaEventTrace{.eventType = Event::Type::Rename,
|
||||
.inodeId = newDstEntry.id,
|
||||
.parentId = srcEntry.parent,
|
||||
.entryName = srcEntry.name,
|
||||
.dstParentId = newDstEntry.parent,
|
||||
.dstEntryName = newDstEntry.name,
|
||||
.userId = req_.user.uid,
|
||||
.client = req_.client,
|
||||
.origPath = origPath.value_or(Path())});
|
||||
|
||||
if (oldDst.has_value()) {
|
||||
auto [oldDstInode, oldDstNlink] = *oldDst;
|
||||
event.addField("oldDstInode", oldDstInode).addField("oldDstNlink", oldDstNlink);
|
||||
}
|
||||
|
||||
if (origPath.has_value()) {
|
||||
event.addField("origPath", origPath->string());
|
||||
}
|
||||
|
||||
co_return RenameRsp(std::move(inode));
|
||||
}
|
||||
|
||||
private:
|
||||
const RenameReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<RenameRsp> MetaStore::rename(const RenameReq &req) { return std::make_unique<RenameOp>(*this, req); }
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
99
src/meta/store/ops/SetAttr.cc
Normal file
99
src/meta/store/ops/SetAttr.cc
Normal file
@@ -0,0 +1,99 @@
|
||||
#include "SetAttr.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <optional>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/store/DirEntry.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
#include "meta/store/PathResolve.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class SetAttrOp : public Operation<SetAttrRsp> {
|
||||
public:
|
||||
SetAttrOp(MetaStore &meta, const SetAttrReq &req_)
|
||||
: Operation<SetAttrRsp>(meta),
|
||||
req_(req_) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<SetAttrRsp> run(IReadWriteTransaction &txn) override {
|
||||
XLOGF(DBG, "SetAttrOp: {}", req_);
|
||||
|
||||
Inode inode;
|
||||
std::optional<DirEntry> entry;
|
||||
if (req_.path.path.has_value()) {
|
||||
auto dirEntryResult = co_await resolve(txn, req_.user)
|
||||
.dirEntry(req_.path, req_.flags | AT_SYMLINK_FOLLOW /* folly symlink by default*/);
|
||||
CO_RETURN_ON_ERROR(dirEntryResult);
|
||||
entry = std::move(*dirEntryResult);
|
||||
auto inodeResult = co_await entry->snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inodeResult);
|
||||
inode = std::move(*inodeResult);
|
||||
} else {
|
||||
auto statResult =
|
||||
co_await resolve(txn, req_.user).inode(req_.path, req_.flags | AT_SYMLINK_FOLLOW, true /* checkRefCnt */);
|
||||
CO_RETURN_ON_ERROR(statResult);
|
||||
inode = std::move(*statResult);
|
||||
}
|
||||
|
||||
auto dirty = false;
|
||||
auto oldAcl = inode.acl;
|
||||
|
||||
CO_RETURN_ON_ERROR(SetAttr::check(inode, req_, config()));
|
||||
dirty |= SetAttr::apply(inode, req_, config().time_granularity(), config().dynamic_stripe_growth());
|
||||
|
||||
if (inode.isDirectory() && inode.acl != oldAcl && inode.id != InodeId::root()) {
|
||||
XLOGF_IF(FATAL, !dirty, "acl changed but dirty not set, {} != {}", inode.acl, oldAcl);
|
||||
|
||||
if (!entry.has_value() || entry->name == "." || entry->name == "..") {
|
||||
auto result = co_await inode.snapshotLoadDirEntry(txn);
|
||||
CO_RETURN_ON_ERROR(result);
|
||||
entry = std::move(*result);
|
||||
if (inode.asDirectory().name.empty()) {
|
||||
inode.asDirectory().name = entry->name;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
XLOGF_IF(DFATAL, entry->name != inode.asDirectory().name, "{} != {}", entry->name, inode.asDirectory().name);
|
||||
entry->dirAcl = inode.acl;
|
||||
CO_RETURN_ON_ERROR(co_await entry->addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await entry->store(txn));
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
// NOTE: add inode into read conflict set
|
||||
CO_RETURN_ON_ERROR(co_await inode.addIntoReadConflict(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
}
|
||||
|
||||
co_return SetAttrRsp(std::move(inode));
|
||||
}
|
||||
|
||||
void finish(const Result<SetAttrRsp> &result) override {
|
||||
Operation<SetAttrRsp>::finish(result);
|
||||
if (!result.hasError()) {
|
||||
if (req_.uid || req_.gid || req_.perm || req_.iflags) aclCache().invalid(result->stat.id);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const SetAttrReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<SetAttrRsp> MetaStore::setAttr(const SetAttrReq &req_) {
|
||||
return std::make_unique<SetAttrOp>(*this, req_);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
182
src/meta/store/ops/SetAttr.h
Normal file
182
src/meta/store/ops/SetAttr.h
Normal file
@@ -0,0 +1,182 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <compare>
|
||||
#include <folly/ScopeGuard.h>
|
||||
#include <folly/lang/Ordering.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <linux/fs.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
#include "fbs/meta/Schema.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "meta/base/Config.h"
|
||||
#include "meta/service/MetaOperator.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/Utils.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
class SetAttr {
|
||||
public:
|
||||
static Result<Void> check(const Inode &inode, const SetAttrReq &req, const Config &config) {
|
||||
RETURN_ON_ERROR(req.valid());
|
||||
|
||||
// permission check for setPermission
|
||||
if (inode.id.isTreeRoot() && (req.perm || req.uid || req.gid)) {
|
||||
XLOGF(WARN, "Don't allow change permission of tree root {}!", inode.id);
|
||||
return makeError(MetaCode::kNoPermission, fmt::format("Don't allow change permission of {}", inode.id));
|
||||
}
|
||||
if (req.iflags.has_value() && *req.iflags != inode.acl.iflags) {
|
||||
auto setChainAllocation = !(inode.acl.iflags & FS_CHAIN_ALLOCATION_FL) && (*req.iflags & FS_CHAIN_ALLOCATION_FL);
|
||||
if (setChainAllocation && !config.iflags_chain_allocation()) {
|
||||
return makeError(MetaCode::kNoPermission, "FS_CHAIN_ALLOCATION_FL disabled");
|
||||
}
|
||||
auto setNewChunkEngine = !(inode.acl.iflags & FS_NEW_CHUNK_ENGINE) && (*req.iflags & FS_NEW_CHUNK_ENGINE);
|
||||
if (setNewChunkEngine && !config.iflags_chunk_engine()) {
|
||||
return makeError(MetaCode::kNoPermission, "FS_NEW_CHUNK_ENGINE disabled");
|
||||
}
|
||||
auto changed = *req.iflags ^ inode.acl.iflags;
|
||||
auto ownerChangeable = config.allow_owner_change_immutable() ? (uint32_t)(FS_HUGE_FILE_FL | FS_IMMUTABLE_FL)
|
||||
: (uint32_t)(FS_HUGE_FILE_FL);
|
||||
auto permCheck = req.user.isRoot() || (req.user.uid == inode.acl.uid && changed == (changed & ownerChangeable));
|
||||
if (!permCheck) {
|
||||
// NOTE: only allow root user set inode flags, file owner can use chattr +/- i, or set FS_HUGE_FILE_FL
|
||||
return makeError(MetaCode::kNoPermission, "only root can set iflags");
|
||||
}
|
||||
}
|
||||
if (req.perm.has_value() && *req.perm != inode.acl.perm && !req.user.isRoot() && req.user.uid != inode.acl.uid) {
|
||||
// man 2 chmod: The effective UID of the calling process must match the owner of the file, or the process must be
|
||||
// privileged (Linux: it must have the CAP_FOWNER capability).
|
||||
return makeError(MetaCode::kNoPermission, "no perm to set perm");
|
||||
}
|
||||
if (req.uid.has_value() && *req.uid != inode.acl.uid && !req.user.isRoot()) {
|
||||
// Only a privileged process (Linux: one with the CAP_CHOWN capability) may change the owner of a file.
|
||||
return makeError(MetaCode::kNoPermission, "no perm to set uid");
|
||||
}
|
||||
if (req.gid.has_value() && *req.gid != inode.acl.gid && !req.user.isRoot() &&
|
||||
(req.user.uid != inode.acl.uid || !req.user.inGroup(req.gid.value()))) {
|
||||
// The owner of a file may change the group of the file to any group of which that owner is a member. A
|
||||
// privileged process (Linux: with CAP_CHOWN) may change the group arbitrarily.
|
||||
return makeError(MetaCode::kNoPermission, "no perm to set gid");
|
||||
}
|
||||
|
||||
// permission check for utimes
|
||||
// To set both file timestamps to the current time (i.e., times is NULL, or both tv_nsec fields specify UTIME_NOW),
|
||||
// either:
|
||||
// 1. the caller must have write access to the file;
|
||||
// 2. the caller's effective user ID must match the owner of the file; or
|
||||
// 3. the caller must have appropriate privileges.
|
||||
// NOTE: we use UtcTime(0) as UTIME_NOW
|
||||
auto cond1 = inode.acl.checkPermission(req.user, AccessType::WRITE).hasValue();
|
||||
auto cond2 = req.user.uid == inode.acl.uid;
|
||||
auto cond3 = req.user.isRoot();
|
||||
if (req.atime || req.mtime) {
|
||||
if ((req.atime && req.atime != SETATTR_TIME_NOW) || (req.mtime && req.mtime != SETATTR_TIME_NOW)) {
|
||||
// To make any change other than setting both timestamps to the current time (i.e., times is not NULL, and
|
||||
// neither tv_nsec field is UTIME_NOW and neither tv_nsec field is UTIME_OMIT), either condition 2 or 3 above
|
||||
// must apply.
|
||||
if (!cond2 && !cond3) {
|
||||
return makeError(MetaCode::kNoPermission);
|
||||
}
|
||||
} else {
|
||||
if (!cond1 && !cond2 && !cond3) {
|
||||
return makeError(MetaCode::kNoPermission);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// permission check for setLayout
|
||||
if (req.layout) {
|
||||
if (!inode.isDirectory()) {
|
||||
return makeError(MetaCode::kNotDirectory, "setLayout but not directory");
|
||||
}
|
||||
RETURN_ON_ERROR(inode.acl.checkPermission(req.user, AccessType::WRITE));
|
||||
}
|
||||
|
||||
if (!inode.isFile() && req.dynStripe) {
|
||||
return makeError(MetaCode::kNotFile, "extend dynStripe but not file");
|
||||
}
|
||||
|
||||
return Void{};
|
||||
}
|
||||
|
||||
static bool apply(Inode &inode, const SetAttrReq &req, Duration resolution, uint32_t stripeGrowth) {
|
||||
// now we can do update.
|
||||
bool dirty = false;
|
||||
|
||||
// setPermission
|
||||
dirty |= update(inode.acl.iflags, req.iflags);
|
||||
dirty |= update(inode.acl.uid, req.uid);
|
||||
dirty |= update(inode.acl.gid, req.gid);
|
||||
dirty |= update(inode.acl.perm, req.perm);
|
||||
// setLayout
|
||||
if (req.layout.has_value()) {
|
||||
dirty |= update(inode.asDirectory().layout, req.layout);
|
||||
}
|
||||
if (dirty) {
|
||||
update(inode.ctime, SETATTR_TIME_NOW, resolution, true /* cmp */);
|
||||
}
|
||||
// utimes
|
||||
dirty |= update(inode.atime, req.atime, resolution, false /* cmp */);
|
||||
dirty |= update(inode.mtime, req.mtime, resolution, false /* cmp */);
|
||||
|
||||
// extend
|
||||
if (req.dynStripe && inode.asFile().dynStripe && inode.asFile().dynStripe < req.dynStripe) {
|
||||
XLOGF_IF(FATAL, !inode.isFile(), "inode {} is not file", inode);
|
||||
|
||||
auto growth = std::max(2u, stripeGrowth);
|
||||
auto dynStripe = inode.asFile().dynStripe;
|
||||
while (dynStripe < std::min(req.dynStripe, inode.asFile().layout.stripeSize)) {
|
||||
dynStripe = std::min(dynStripe * growth, inode.asFile().layout.stripeSize);
|
||||
}
|
||||
dirty |= update(inode.asFile().dynStripe, dynStripe);
|
||||
}
|
||||
|
||||
return dirty;
|
||||
}
|
||||
|
||||
static bool update(UtcTime &v, std::optional<UtcTime> nv, Duration resolution, bool cmp) {
|
||||
if (!nv) {
|
||||
return false;
|
||||
}
|
||||
if (*nv == SETATTR_TIME_NOW) {
|
||||
nv = UtcClock::now();
|
||||
}
|
||||
nv = nv->castGranularity(resolution);
|
||||
if (*nv != v && (!cmp || (*nv > v))) {
|
||||
v = *nv;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static bool update(T &v, std::optional<T> nv) {
|
||||
static_assert(!std::is_same_v<T, UtcTime>);
|
||||
if (nv.has_value() && nv != v) {
|
||||
v = *nv;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static bool update(T &v, T nv) {
|
||||
if (nv != v) {
|
||||
v = nv;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
154
src/meta/store/ops/Stat.cc
Normal file
154
src/meta/store/ops/Stat.cc
Normal file
@@ -0,0 +1,154 @@
|
||||
#include <algorithm>
|
||||
#include <fcntl.h>
|
||||
#include <folly/Likely.h>
|
||||
#include <folly/Unit.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/StatusCode.h"
|
||||
#include "fbs/meta/Common.h"
|
||||
#include "fbs/meta/Service.h"
|
||||
#include "fbs/meta/Utils.h"
|
||||
#include "meta/store/BatchContext.h"
|
||||
#include "meta/store/Inode.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
namespace {
|
||||
monitor::CountRecorder statFile("meta_server.stat_file");
|
||||
monitor::CountRecorder statDir("meta_server.stat_dir");
|
||||
monitor::CountRecorder statSymlink("meta_server.stat_symlink");
|
||||
} // namespace
|
||||
|
||||
/** MetaStore::stat */
|
||||
class StatOp : public ReadOnlyOperation<StatRsp> {
|
||||
public:
|
||||
StatOp(MetaStore &meta, const StatReq &req)
|
||||
: ReadOnlyOperation<StatRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<StatRsp> run(IReadOnlyTransaction &txn) override {
|
||||
XLOGF(DBG, "StatOp::run, req {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
auto stat = co_await resolve(txn, req_.user)
|
||||
.inode(req_.path, req_.flags, !config().allow_stat_deleted_inodes() /* checkRefCnt */);
|
||||
CO_RETURN_ON_ERROR(stat);
|
||||
|
||||
switch (stat->getType()) {
|
||||
case InodeType::File:
|
||||
statFile.addSample(1);
|
||||
break;
|
||||
case InodeType::Directory:
|
||||
statDir.addSample(1);
|
||||
break;
|
||||
case InodeType::Symlink:
|
||||
statSymlink.addSample(1);
|
||||
break;
|
||||
}
|
||||
|
||||
co_return StatRsp(std::move(*stat));
|
||||
}
|
||||
|
||||
private:
|
||||
const StatReq &req_;
|
||||
};
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
class BatchStatOp : public ReadOnlyOperation<Rsp> {
|
||||
public:
|
||||
BatchStatOp(MetaStore &meta, const Req &req)
|
||||
: ReadOnlyOperation<Rsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
auto &vector() {
|
||||
if constexpr (std::is_same_v<Req, BatchStatReq>) {
|
||||
return req_.inodeIds;
|
||||
} else {
|
||||
return req_.paths;
|
||||
}
|
||||
}
|
||||
|
||||
auto createBatchContext() {
|
||||
if constexpr (std::is_same_v<Req, BatchStatReq>) {
|
||||
return folly::Unit{};
|
||||
} else {
|
||||
return BatchContext::create();
|
||||
}
|
||||
}
|
||||
|
||||
CoTryTask<Inode> resolve(IReadOnlyTransaction &txn, const PathAt &path) {
|
||||
co_return co_await ReadOnlyOperation<Rsp>::resolve(txn, req_.user)
|
||||
.inode(path, req_.flags, !this->config().allow_stat_deleted_inodes() /* checkRefCnt */);
|
||||
}
|
||||
|
||||
CoTryTask<Rsp> run(IReadOnlyTransaction &txn) override {
|
||||
XLOGF(DBG, "BatchStatOp::run, req {}", req_);
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
static constexpr auto byPath = !std::is_same_v<Req, BatchStatReq>;
|
||||
using ResultType = std::conditional_t<byPath, Result<meta::Inode>, std::optional<meta::Inode>>;
|
||||
using TaskResultType = std::conditional_t<byPath, Result<Inode>, Result<std::optional<Inode>>>;
|
||||
|
||||
size_t concurrent =
|
||||
std::max(1u, !byPath ? this->config().batch_stat_concurrent() : this->config().batch_stat_by_path_concurrent());
|
||||
auto exec = co_await folly::coro::co_current_executor;
|
||||
[[maybe_unused]] auto guard = createBatchContext();
|
||||
|
||||
std::vector<ResultType> inodes;
|
||||
auto iter = vector().begin();
|
||||
while (iter != vector().end()) {
|
||||
std::vector<folly::SemiFuture<TaskResultType>> tasks;
|
||||
while (iter != vector().end() && tasks.size() < concurrent) {
|
||||
if constexpr (!byPath) {
|
||||
tasks.push_back(Inode::snapshotLoad(txn, *iter).scheduleOn(exec).start());
|
||||
} else {
|
||||
static_assert(std::is_same_v<Req, BatchStatByPathReq>);
|
||||
tasks.push_back(resolve(txn, *iter).scheduleOn(exec).start());
|
||||
}
|
||||
iter++;
|
||||
}
|
||||
auto results = co_await folly::coro::collectAllRange(std::move(tasks));
|
||||
for (auto result : results) {
|
||||
if (result.hasError() && (!byPath || !ErrorHandling::success(result))) {
|
||||
XLOGF(INFO, "batch stat error {}", result.error());
|
||||
CO_RETURN_ERROR(result);
|
||||
}
|
||||
if constexpr (byPath) {
|
||||
inodes.push_back(result);
|
||||
} else {
|
||||
inodes.push_back(*result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
co_return Rsp(std::move(inodes));
|
||||
}
|
||||
|
||||
private:
|
||||
const Req &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<StatRsp> MetaStore::stat(const StatReq &req) { return std::make_unique<StatOp>(*this, req); }
|
||||
|
||||
MetaStore::OpPtr<BatchStatRsp> MetaStore::batchStat(const BatchStatReq &req) {
|
||||
return std::make_unique<BatchStatOp<BatchStatReq, BatchStatRsp>>(*this, req);
|
||||
}
|
||||
|
||||
MetaStore::OpPtr<BatchStatByPathRsp> MetaStore::batchStatByPath(const BatchStatByPathReq &req) {
|
||||
return std::make_unique<BatchStatOp<BatchStatByPathReq, BatchStatByPathRsp>>(*this, req);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
32
src/meta/store/ops/StatFs.cc
Normal file
32
src/meta/store/ops/StatFs.cc
Normal file
@@ -0,0 +1,32 @@
|
||||
#include <chrono>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/** MetaStore::statFs */
|
||||
class StatFsOp : public ReadOnlyOperation<StatFsRsp> {
|
||||
public:
|
||||
StatFsOp(MetaStore &meta, const StatFsReq &req)
|
||||
: ReadOnlyOperation<StatFsRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<StatFsRsp> run(IReadOnlyTransaction &) override {
|
||||
XLOGF(DBG, "StatFsOp::run {}", req_);
|
||||
co_return co_await fileHelper().statFs(req_.user, std::chrono::seconds(30));
|
||||
}
|
||||
|
||||
private:
|
||||
const StatFsReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<StatFsRsp> MetaStore::statFs(const StatFsReq &req) { return std::make_unique<StatFsOp>(*this, req); }
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
94
src/meta/store/ops/Symlink.cc
Normal file
94
src/meta/store/ops/Symlink.cc
Normal file
@@ -0,0 +1,94 @@
|
||||
#include <cassert>
|
||||
#include <fcntl.h>
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <memory>
|
||||
|
||||
#include "common/kv/ITransaction.h"
|
||||
#include "common/utils/Coroutine.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "meta/event/Event.h"
|
||||
#include "meta/store/MetaStore.h"
|
||||
#include "meta/store/Operation.h"
|
||||
|
||||
namespace hf3fs::meta::server {
|
||||
|
||||
/** MetaStore::symlink */
|
||||
class SymlinkOp : public Operation<SymlinkRsp> {
|
||||
public:
|
||||
SymlinkOp(MetaStore &meta, const SymlinkReq &req)
|
||||
: Operation<SymlinkRsp>(meta),
|
||||
req_(req) {}
|
||||
|
||||
OPERATION_TAGS(req_);
|
||||
|
||||
CoTryTask<SymlinkRsp> run(IReadWriteTransaction &txn) override {
|
||||
XLOGF(DBG, "SymlinkOp {}", req_);
|
||||
|
||||
CHECK_REQUEST(req_);
|
||||
|
||||
auto resolveResult = co_await resolve(txn, req_.user).path(req_.path, AtFlags(AT_SYMLINK_NOFOLLOW));
|
||||
CO_RETURN_ON_ERROR(resolveResult);
|
||||
if (resolveResult->dirEntry.has_value()) {
|
||||
auto &entry = *resolveResult->dirEntry;
|
||||
if (entry.uuid != Uuid::zero() && entry.uuid == req_.uuid) {
|
||||
// this may happens when FDB returns commit_unknown_result, or we failed to send response to client
|
||||
XLOGF(CRITICAL, "Symlink already created, dst {}, req {}, uuid {}", entry, req_, req_.uuid);
|
||||
auto inode = co_await entry.snapshotLoadInode(txn);
|
||||
CO_RETURN_ON_ERROR(inode);
|
||||
co_return SymlinkRsp(std::move(*inode));
|
||||
}
|
||||
co_return makeError(MetaCode::kExists);
|
||||
}
|
||||
|
||||
// check permission and lock
|
||||
auto parent = co_await resolveResult->getParentInode(txn);
|
||||
CO_RETURN_ON_ERROR(parent);
|
||||
CO_RETURN_ON_ERROR(parent->acl.checkPermission(req_.user, AccessType::WRITE));
|
||||
CO_RETURN_ON_ERROR(parent->asDirectory().checkLock(req_.client));
|
||||
|
||||
auto inodeId = co_await allocateInodeId(txn, false);
|
||||
CO_RETURN_ON_ERROR(inodeId);
|
||||
|
||||
assert(req_.path.path.has_value());
|
||||
InodeId parentId = resolveResult->getParentId();
|
||||
DirEntry entry = DirEntry::newSymlink(parentId, req_.path.path->filename().native(), *inodeId);
|
||||
entry.uuid = req_.uuid;
|
||||
Inode inode = Inode::newSymlink(*inodeId, req_.target, req_.user.uid, req_.user.gid, now());
|
||||
|
||||
// NOTE: add parent inode and dirEntry into read conflict set.
|
||||
// add parent inode into read conflict set to prevent parent is removed concurrently
|
||||
CO_RETURN_ON_ERROR(co_await Inode(parentId).addIntoReadConflict(txn));
|
||||
// add directory entry into read conflict set to prevent concurrent create
|
||||
CO_RETURN_ON_ERROR(co_await entry.addIntoReadConflict(txn));
|
||||
|
||||
// create inode and dirEntry
|
||||
CO_RETURN_ON_ERROR(co_await entry.store(txn));
|
||||
CO_RETURN_ON_ERROR(co_await inode.store(txn));
|
||||
|
||||
addEvent(Event::Type::Symlink)
|
||||
.addField("parent", entry.parent)
|
||||
.addField("name", entry.name)
|
||||
.addField("target", inode.asSymlink().target.native())
|
||||
.addField("user", req_.user.uid)
|
||||
.addField("host", req_.client.hostname);
|
||||
addTrace(MetaEventTrace{
|
||||
.eventType = Event::Type::Symlink,
|
||||
.parentId = entry.parent,
|
||||
.entryName = entry.name,
|
||||
.userId = req_.user.uid,
|
||||
.client = req_.client,
|
||||
.symLinkTarget = inode.asSymlink().target,
|
||||
});
|
||||
|
||||
co_return SymlinkRsp(std::move(inode));
|
||||
}
|
||||
|
||||
private:
|
||||
const SymlinkReq &req_;
|
||||
};
|
||||
|
||||
MetaStore::OpPtr<SymlinkRsp> MetaStore::symlink(const SymlinkReq &req) {
|
||||
return std::make_unique<SymlinkOp>(*this, req);
|
||||
}
|
||||
|
||||
} // namespace hf3fs::meta::server
|
||||
Reference in New Issue
Block a user