Initial commit

This commit is contained in:
dev
2025-02-27 21:53:53 +08:00
commit 815e55e4c0
1291 changed files with 185445 additions and 0 deletions

View File

@@ -0,0 +1,87 @@
#include "MgmtdBackgroundRunner.h"
#include "MgmtdChainsUpdater.h"
#include "MgmtdClientSessionsChecker.h"
#include "MgmtdHeartbeatChecker.h"
#include "MgmtdHeartbeater.h"
#include "MgmtdLeaseExtender.h"
#include "MgmtdMetricsUpdater.h"
#include "MgmtdNewBornChainsChecker.h"
#include "MgmtdRoutingInfoVersionUpdater.h"
#include "MgmtdTargetInfoLoader.h"
#include "MgmtdTargetInfoPersister.h"
#include "common/utils/BackgroundRunner.h"
#include "mgmtd/service/MgmtdConfig.h"
#include "mgmtd/service/MgmtdState.h"
namespace hf3fs::mgmtd {
MgmtdBackgroundRunner::MgmtdBackgroundRunner(MgmtdState &state)
: state_(state) {
if (state_.env_->backgroundExecutor()) {
backgroundRunner_ = std::make_unique<BackgroundRunner>(*state_.env_->backgroundExecutor());
heartbeater_ = std::make_unique<MgmtdHeartbeater>(state_);
leaseExtender_ = std::make_unique<MgmtdLeaseExtender>(state_);
chainsUpdater_ = std::make_unique<MgmtdChainsUpdater>(state_);
clientSessionsChecker_ = std::make_unique<MgmtdClientSessionsChecker>(state_);
heartbeatChecker_ = std::make_unique<MgmtdHeartbeatChecker>(state_);
newBornChainsChecker_ = std::make_unique<MgmtdNewBornChainsChecker>(state_);
routingInfoVersionUpdater_ = std::make_unique<MgmtdRoutingInfoVersionUpdater>(state_);
metricsUpdater_ = std::make_unique<MgmtdMetricsUpdater>(state_);
targetInfoPersister_ = std::make_unique<MgmtdTargetInfoPersister>(state_);
targetInfoLoader_ = std::make_unique<MgmtdTargetInfoLoader>(state_);
}
}
MgmtdBackgroundRunner::~MgmtdBackgroundRunner() {}
void MgmtdBackgroundRunner::start() {
if (backgroundRunner_) {
backgroundRunner_->start(
"extendLease",
[this] { return leaseExtender_->extend(); },
state_.config_.extend_lease_interval_getter());
backgroundRunner_->start(
"checkClientSessions",
[this] { return clientSessionsChecker_->check(); },
state_.config_.check_status_interval_getter());
backgroundRunner_->start(
"checkNewBornChains",
[this] { return newBornChainsChecker_->check(); },
state_.config_.check_status_interval_getter());
backgroundRunner_->start(
"checkHeartbeat",
[this] { return heartbeatChecker_->check(); },
state_.config_.check_status_interval_getter());
backgroundRunner_->start(
"sendHeartbeat",
[this] { return heartbeater_->send(); },
state_.config_.send_heartbeat_interval_getter());
backgroundRunner_->start(
"updateChains",
[this, lastUpdateTs = SteadyClock::now()]() mutable { return chainsUpdater_->update(lastUpdateTs); },
state_.config_.update_chains_interval_getter());
backgroundRunner_->start(
"bumpRoutingInfoVersion",
[this] { return routingInfoVersionUpdater_->update(); },
state_.config_.bump_routing_info_version_interval_getter());
backgroundRunner_->start(
"updateMetrics",
[this] { return metricsUpdater_->update(); },
state_.config_.update_metrics_interval_getter());
backgroundRunner_->start(
"persistTargetInfo",
[this] { return targetInfoPersister_->run(); },
state_.config_.target_info_persist_interval_getter());
backgroundRunner_->start(
"loadTargetInfo",
[this] { return targetInfoLoader_->run(); },
state_.config_.target_info_load_interval_getter());
}
}
CoTask<void> MgmtdBackgroundRunner::stop() {
if (backgroundRunner_) {
co_await backgroundRunner_->stopAll();
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,48 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs {
class BackgroundRunner;
namespace mgmtd {
class MgmtdHeartbeater;
class MgmtdLeaseExtender;
struct MgmtdState;
class MgmtdChainsUpdater;
class MgmtdClientSessionsChecker;
class MgmtdHeartbeatChecker;
class MgmtdNewBornChainsChecker;
class MgmtdRoutingInfoVersionUpdater;
class MgmtdMetricsUpdater;
class MgmtdTargetInfoPersister;
class MgmtdTargetInfoLoader;
namespace testing {
class MgmtdTestHelper;
}
class MgmtdBackgroundRunner {
public:
explicit MgmtdBackgroundRunner(MgmtdState &state);
~MgmtdBackgroundRunner();
void start();
CoTask<void> stop();
private:
MgmtdState &state_;
std::unique_ptr<BackgroundRunner> backgroundRunner_;
std::unique_ptr<MgmtdHeartbeater> heartbeater_;
std::unique_ptr<MgmtdLeaseExtender> leaseExtender_;
std::unique_ptr<MgmtdChainsUpdater> chainsUpdater_;
std::unique_ptr<MgmtdClientSessionsChecker> clientSessionsChecker_;
std::unique_ptr<MgmtdHeartbeatChecker> heartbeatChecker_;
std::unique_ptr<MgmtdNewBornChainsChecker> newBornChainsChecker_;
std::unique_ptr<MgmtdRoutingInfoVersionUpdater> routingInfoVersionUpdater_;
std::unique_ptr<MgmtdMetricsUpdater> metricsUpdater_;
std::unique_ptr<MgmtdTargetInfoPersister> targetInfoPersister_;
std::unique_ptr<MgmtdTargetInfoLoader> targetInfoLoader_;
friend class testing::MgmtdTestHelper;
};
} // namespace mgmtd
} // namespace hf3fs

View File

@@ -0,0 +1,80 @@
#include "MgmtdChainsUpdater.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "UpdateChains", "bg"> {
String toStringImpl() const final { return "UpdateChains"; }
CoTryTask<SteadyTime> handle(MgmtdState &state, SteadyTime lastUpdateTs, bool lastAdjustTargetOrderFlag) {
auto writerLock = co_await state.coScopedLock<"UpdateChains">();
robin_hood::unordered_set<flat::ChainId> candidateChains;
std::vector<flat::ChainInfo> changedChains;
bool needPromoteRoutingInfoVersion = false;
auto steadyNow = SteadyClock::now();
{
auto dataPtr = co_await state.data_.coSharedLock();
const auto &ri = dataPtr->routingInfo;
for (const auto &[tid, ti] : ri.getTargets()) {
if (ti.ts() > lastUpdateTs) {
candidateChains.insert(ti.base().chainId);
} else if (!lastAdjustTargetOrderFlag && state.config_.try_adjust_target_order_as_preferred()) {
candidateChains.insert(ti.base().chainId);
}
needPromoteRoutingInfoVersion |= ti.importantInfoChangedTime > lastUpdateTs;
}
for (auto chainId : candidateChains) {
dataPtr->appendChangedChains(chainId, changedChains, state.config_.try_adjust_target_order_as_preferred());
}
}
if (changedChains.empty() && !needPromoteRoutingInfoVersion) co_return steadyNow;
auto commitRes =
co_await updateStoredRoutingInfo(state, *this, [&](kv::IReadWriteTransaction &txn) -> CoTryTask<void> {
for (const auto &chain : changedChains) {
CO_RETURN_ON_ERROR(co_await state.store_.storeChainInfo(txn, chain));
}
co_return Void{};
});
CO_RETURN_ON_ERROR(commitRes);
co_await updateMemoryRoutingInfo(state, *this, [&](RoutingInfo &ri) {
ri.applyChainTargetChanges(*this, changedChains, steadyNow);
});
co_return steadyNow;
}
};
} // namespace
MgmtdChainsUpdater::MgmtdChainsUpdater(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdChainsUpdater::update(SteadyTime &lastUpdateTs) {
Op op;
auto handler = [&]() -> CoTryTask<SteadyTime> {
CO_INVOKE_OP_INFO(op, "background", state_, lastUpdateTs, lastAdjustTargetOrderFlag);
};
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
XLOGF(INFO, "Mgmtd: self is not primary, skip updateChains");
else
LOG_OP_ERR(op, "failed: {}", res.error());
} else {
lastUpdateTs = *res;
lastAdjustTargetOrderFlag = state_.config_.try_adjust_target_order_as_preferred();
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,18 @@
#pragma once
#include "common/utils/Coroutine.h"
#include "common/utils/UtcTime.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdChainsUpdater {
public:
explicit MgmtdChainsUpdater(MgmtdState &state);
CoTask<void> update(SteadyTime &lastUpdateTs);
private:
bool lastAdjustTargetOrderFlag = false;
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,55 @@
#include "MgmtdClientSessionsChecker.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "CheckClientSessions", "bg"> {
String toStringImpl() const final { return "CheckClientSessions"; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
auto timeout = state.config_.client_session_timeout().asUs();
auto now = SteadyClock::now();
std::vector<String> timeoutedClients;
{
auto clientSessionMap = co_await state.clientSessionMap_.coSharedLock();
for (const auto &[clientId, sessionData] : *clientSessionMap) {
if (sessionData.ts() + timeout < now) timeoutedClients.push_back(clientId);
}
}
if (!timeoutedClients.empty()) {
LOG_OP_INFO(*this, "remove timeouted client sessions [{}]", fmt::join(timeoutedClients, ","));
auto clientSessionMap = co_await state.clientSessionMap_.coLock();
for (const auto &clientId : timeoutedClients) {
auto it = clientSessionMap->find(clientId);
if (it->second.ts() + timeout < now) {
clientSessionMap->erase(it);
}
}
}
co_return Void{};
}
};
} // namespace
MgmtdClientSessionsChecker::MgmtdClientSessionsChecker(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdClientSessionsChecker::check() {
Op op;
auto handler = [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); };
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
LOG_OP_INFO(op, "self is not primary, skip");
else
LOG_OP_ERR(op, "failed: {}", res.error());
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdClientSessionsChecker {
public:
explicit MgmtdClientSessionsChecker(MgmtdState &state);
CoTask<void> check();
private:
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,98 @@
#include "MgmtdHeartbeatChecker.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
flat::NodeInfo onNodeFailed(const flat::NodeInfo &oldNodeInfo, UtcTime) {
auto sn = oldNodeInfo;
sn.status = flat::NodeStatus::HEARTBEAT_FAILED;
return sn;
}
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "CheckHeartbeat", "bg"> {
String toStringImpl() const final { return "CheckHeartbeat"; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
auto heartbeatFailInterval = state.config_.heartbeat_fail_interval().asUs();
auto start = state.utcNow();
auto writerLock = co_await state.coScopedLock<"CheckHeartbeat">();
auto steadyNow = SteadyClock::now();
std::vector<flat::NodeId> timeoutedNodeIds;
std::vector<flat::NodeInfo> timeoutedNodes;
std::vector<flat::TargetId> candidateTargetIds;
{
auto dataPtr = co_await state.data_.coSharedLock();
const auto &ri = dataPtr->routingInfo;
for (const auto &[nodeId, nodeInfo] : ri.nodeMap) {
switch (nodeInfo.base().status) {
case flat::NodeStatus::DISABLED:
case flat::NodeStatus::HEARTBEAT_FAILED:
case flat::NodeStatus::PRIMARY_MGMTD:
break; // do nothing
default:
if (nodeInfo.ts() + heartbeatFailInterval < steadyNow) {
timeoutedNodeIds.push_back(nodeId);
timeoutedNodes.push_back(onNodeFailed(nodeInfo.base(), start));
}
}
}
for (const auto &[tid, ti] : ri.getTargets()) {
if (ti.base().localState != flat::LocalTargetState::OFFLINE && ti.ts() + heartbeatFailInterval < steadyNow) {
candidateTargetIds.push_back(tid);
}
}
if (timeoutedNodes.empty() && candidateTargetIds.empty()) co_return Void{};
LOG_OP_INFO(*this,
"found timeouted nodes:[{}] targets:[{}]",
fmt::join(timeoutedNodeIds, ","),
fmt::join(candidateTargetIds, ","));
}
if (!candidateTargetIds.empty() || !timeoutedNodes.empty()) {
auto dataPtr = co_await state.data_.coLock();
auto &ri = dataPtr->routingInfo;
auto steadyNow = SteadyClock::now();
for (auto tid : candidateTargetIds) {
ri.updateTarget(tid, [steadyNow](auto &ti) {
ti.base().localState = flat::LocalTargetState::OFFLINE;
ti.updateTs(steadyNow);
});
}
for (auto &info : timeoutedNodes) ri.nodeMap[info.app.nodeId].base() = std::move(info);
ri.routingInfoChanged = true;
}
co_return Void{};
}
};
} // namespace
MgmtdHeartbeatChecker::MgmtdHeartbeatChecker(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdHeartbeatChecker::check() {
Op op;
auto handler = [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); };
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
LOG_OP_INFO(op, "self is not primary, skip");
else
LOG_OP_ERR(op, "failed: {}", res.error());
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdHeartbeatChecker {
public:
explicit MgmtdHeartbeatChecker(MgmtdState &state);
CoTask<void> check();
private:
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,93 @@
#include "MgmtdHeartbeater.h"
#include "common/app/ApplicationBase.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
struct MgmtdHeartbeater::SendHeartbeatContext {
flat::MgmtdLeaseInfo lease;
std::unique_ptr<MgmtdStub> stub;
flat::HeartbeatInfo info;
};
namespace {
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "SendHeartbeat", "bg"> {
String toStringImpl() const final { return "SendHeartbeat"; }
auto handle(MgmtdState &state, MgmtdHeartbeater::SendHeartbeatContext &sendHeartbeatCtx) -> CoTryTask<void> {
auto &info = sendHeartbeatCtx.info;
info.configStatus = ApplicationBase::getConfigStatus();
auto res = co_await sendHeartbeatCtx.stub->heartbeat(
HeartbeatReq::create(state.env_->appInfo().clusterId, info, UtcClock::now()));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kHeartbeatVersionStale) {
uint64_t v = 0;
auto result = scn::scan(String(res.error().message()), "{}", v);
if (result) {
info.hbVersion = flat::HeartbeatVersion(v + 1);
} else {
info.hbVersion = flat::HeartbeatVersion(info.hbVersion + 1);
}
}
CO_RETURN_ERROR(res);
} else {
if (res->config) {
auto r = updateSelfConfig(state, *res->config);
if (r.hasError()) {
LOG_OP_WARN(*this, "Update config failed: {}. version: {}", r.error(), res->config->configVersion);
} else {
info.configVersion = res->config->configVersion;
LOG_OP_INFO(*this, "Update config succeeded and promote config version to {}", res->config->configVersion);
}
}
info.hbVersion = nextVersion(info.hbVersion);
co_return Void{};
}
}
};
} // namespace
MgmtdHeartbeater::MgmtdHeartbeater(MgmtdState &state)
: state_(state) {}
MgmtdHeartbeater::~MgmtdHeartbeater() {}
CoTask<void> MgmtdHeartbeater::send() {
Op op;
if (!state_.config_.send_heartbeat()) {
sendHeartbeatCtx_.reset();
LOG_OP_DBG(op, "disabled, skip");
co_return;
}
auto start = state_.utcNow();
auto lease = co_await state_.currentLease(start);
if (!lease.has_value()) {
sendHeartbeatCtx_.reset();
LOG_OP_DBG(op, "primary not found, skip");
co_return;
} else if (lease->primary.nodeId == state_.selfId()) {
sendHeartbeatCtx_.reset();
LOG_OP_DBG(op, "self is primary, skip");
co_return;
}
if (!sendHeartbeatCtx_ || sendHeartbeatCtx_->lease.leaseStart != lease->leaseStart) {
auto addrs = flat::extractAddresses(lease->primary.serviceGroups, "Mgmtd");
if (addrs.empty()) {
LOG_OP_ERR(op, "primary({}) addr not found, skip", lease->primary.nodeId);
co_return;
}
sendHeartbeatCtx_ = std::make_unique<SendHeartbeatContext>();
sendHeartbeatCtx_->lease = *lease;
// TODO: consider reuse some facilities of MgmtdClient for auto switching addresses
sendHeartbeatCtx_->stub = state_.env_->mgmtdStubFactory()->create(addrs[0]);
sendHeartbeatCtx_->info = flat::HeartbeatInfo(state_.env_->appInfo(), flat::MgmtdHeartbeatInfo{});
}
co_await [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_, *sendHeartbeatCtx_); }();
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,21 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdHeartbeater {
public:
explicit MgmtdHeartbeater(MgmtdState &state);
~MgmtdHeartbeater();
CoTask<void> send();
struct SendHeartbeatContext;
private:
MgmtdState &state_;
std::unique_ptr<SendHeartbeatContext> sendHeartbeatCtx_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,269 @@
#include "MgmtdLeaseExtender.h"
#include <folly/experimental/coro/Collect.h>
#include "common/utils/StringUtils.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "fbs/mgmtd/NodeConversion.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
void addNode(NodeMap &allNodes, flat::NodeInfo nodeInfo) {
auto &node = allNodes[nodeInfo.app.nodeId];
node.base() = std::move(nodeInfo);
node.recordStatusChange(node.base().status, UtcClock::now());
}
CoTryTask<void> loadRoutingInfoVersion(core::ServiceOperation &ctx,
MgmtdStore &store,
kv::IReadWriteTransaction &txn,
flat::RoutingInfoVersion &newVersion) {
auto res = co_await store.loadRoutingInfoVersion(txn);
CO_RETURN_ON_ERROR(res);
LOG_OP_INFO(ctx, "Load routingInfoVersion {}", *res);
newVersion = nextVersion(*res);
co_return Void{};
}
CoTryTask<void> persistNewVersionAndSelfInfo(core::ServiceOperation &ctx,
MgmtdStore &store,
kv::IReadWriteTransaction &txn,
flat::NodeInfo &selfNodeInfo,
flat::PersistentNodeInfo &selfPersistentNodeInfo,
flat::RoutingInfoVersion newVersion,
NodeMap &allNodes) {
CO_RETURN_ON_ERROR(co_await store.storeRoutingInfoVersion(txn, newVersion));
bool needPersistSelf = true;
auto it = allNodes.find(selfNodeInfo.app.nodeId);
if (it != allNodes.end()) {
XLOGF_IF(FATAL,
it->second.base().type != flat::NodeType::MGMTD,
"Unexpected self type: {}",
toString(it->second.base().type));
auto persisted = flat::toPersistentNode(it->second.base());
selfNodeInfo.tags = persisted.tags;
selfPersistentNodeInfo.tags = persisted.tags;
if (persisted == selfPersistentNodeInfo) {
needPersistSelf = false;
}
}
if (needPersistSelf) {
CO_RETURN_ON_ERROR(co_await store.storeNodeInfo(txn, selfPersistentNodeInfo));
}
allNodes[selfNodeInfo.app.nodeId].base() = selfNodeInfo;
co_return Void{};
}
CoTryTask<void> loadAllNodes(core::ServiceOperation &ctx,
MgmtdStore &store,
kv::IReadWriteTransaction &txn,
NodeMap &allNodes) {
auto loadRes = co_await store.loadAllNodes(txn);
CO_RETURN_ON_ERROR(loadRes);
LOG_OP_INFO(ctx, "Load {} nodes", loadRes->size());
for (auto &sn : *loadRes) {
addNode(allNodes, flat::toNode(sn));
}
co_return Void{};
}
CoTryTask<void> loadAllConfigs(core::ServiceOperation &ctx,
MgmtdStore &store,
kv::IReadOnlyTransaction &txn,
ConfigMap &configMap) {
auto loadRes = co_await store.loadAllConfigs(txn);
CO_RETURN_ON_ERROR(loadRes);
for (auto &[type, info] : *loadRes) {
LOG_OP_INFO(ctx, "Load config {} version {}", magic_enum::enum_name(type), info.configVersion);
auto ver = info.configVersion;
configMap[type][ver] = std::move(info);
}
co_return Void{};
}
CoTryTask<void> loadAllChainTables(core::ServiceOperation &ctx,
MgmtdStore &store,
kv::IReadOnlyTransaction &txn,
ChainTableMap &chainTables) {
auto loadRes = co_await store.loadAllChainTables(txn);
CO_RETURN_ON_ERROR(loadRes);
for (auto &info : *loadRes) {
chainTables[info.chainTableId][info.chainTableVersion] = info;
}
co_return Void{};
}
CoTryTask<void> loadAllChains(core::ServiceOperation &ctx,
MgmtdStore &store,
kv::IReadOnlyTransaction &txn,
ChainMap &chains,
TargetMap &targetMap) {
auto loadRes = co_await store.loadAllChains(txn);
CO_RETURN_ON_ERROR(loadRes);
auto steadyNow = SteadyClock::now();
for (auto &info : *loadRes) {
for (const auto &t : info.targets) {
targetMap[t.targetId] = TargetInfo(makeTargetInfo(info.chainId, t), steadyNow);
}
chains[info.chainId] = info;
}
co_return Void{};
}
CoTryTask<void> loadAllUniversalTags(core::ServiceOperation &ctx,
MgmtdStore &store,
kv::IReadOnlyTransaction &txn,
UniversalTagsMap &universalTagsMap) {
auto loadRes = co_await store.loadAllUniversalTags(txn);
CO_RETURN_ON_ERROR(loadRes);
for (auto &[id, tags] : *loadRes) {
universalTagsMap[id] = std::move(tags);
}
co_return Void{};
}
CoTryTask<void> onNewLease(MgmtdState &state, core::ServiceOperation &ctx, MgmtdData &data, UtcTime start) {
flat::RoutingInfoVersion newRoutingInfoVersion{1};
NodeMap allNodes;
ConfigMap allConfigs;
magic_enum::enum_for_each<flat::NodeType>(
[&](auto type) { allConfigs[type][flat::ConfigVersion(0)] = flat::ConfigInfo{}; });
ChainTableMap chainTables;
ChainMap chains;
TargetMap targetMap;
UniversalTagsMap universalTagsMap;
auto handler = [&](kv::IReadWriteTransaction &txn) -> CoTryTask<void> {
auto [rivRes, nodesRes, configsRes, chainTablesRes, chainsRes, utagsRes] =
co_await folly::coro::collectAll(loadRoutingInfoVersion(ctx, state.store_, txn, newRoutingInfoVersion),
loadAllNodes(ctx, state.store_, txn, allNodes),
loadAllConfigs(ctx, state.store_, txn, allConfigs),
loadAllChainTables(ctx, state.store_, txn, chainTables),
loadAllChains(ctx, state.store_, txn, chains, targetMap),
loadAllUniversalTags(ctx, state.store_, txn, universalTagsMap));
CO_RETURN_ON_ERROR(rivRes);
CO_RETURN_ON_ERROR(nodesRes);
CO_RETURN_ON_ERROR(configsRes);
CO_RETURN_ON_ERROR(chainTablesRes);
CO_RETURN_ON_ERROR(chainsRes);
CO_RETURN_ON_ERROR(utagsRes);
// TODO: check routingInfo integrity
co_return co_await persistNewVersionAndSelfInfo(ctx,
state.store_,
txn,
state.selfNodeInfo_,
state.selfPersistentNodeInfo_,
newRoutingInfoVersion,
allNodes);
};
auto loadRes = co_await withReadWriteTxn(state, std::move(handler));
if (!loadRes.hasError()) {
XLOGF_IF(FATAL, !allConfigs.contains(flat::NodeType::MGMTD), "Found no config for MGMTD");
const auto &cfg = allConfigs[flat::NodeType::MGMTD].rbegin()->second;
if (cfg.configVersion && updateSelfConfig(state, cfg)) {
state.selfNodeInfo_.configVersion = cfg.configVersion;
allNodes[state.selfId()].base().configVersion = cfg.configVersion;
}
data.reset(newRoutingInfoVersion,
std::move(allNodes),
std::move(allConfigs),
std::move(chainTables),
std::move(chains),
std::move(targetMap),
std::move(universalTagsMap));
auto clientSessionMap = co_await state.clientSessionMap_.coLock();
clientSessionMap->clear();
co_return Void{};
} else {
// retry in next round
CO_RETURN_ERROR(loadRes);
}
}
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "ExtendLease", "bg"> {
String toStringImpl() const final { return "ExtendLease"; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
auto handler = [&](kv::IReadWriteTransaction &txn) -> CoTryTask<std::optional<flat::MgmtdLeaseInfo>> {
auto checkSelfRes = co_await state.store_.loadNodeInfo(txn, state.selfId());
CO_RETURN_ON_ERROR(checkSelfRes);
if (checkSelfRes->has_value() && flat::findTag(checkSelfRes->value().tags, flat::kDisabledTagKey) != -1) {
co_return std::nullopt;
}
auto extendRes = co_await state.store_.extendLease(txn,
state.selfPersistentNodeInfo_,
state.config_.lease_length().asUs(),
state.utcNow(),
flat::ReleaseVersion::fromVersionInfo(),
state.config_.extend_lease_check_release_version());
CO_RETURN_ON_ERROR(extendRes);
co_return *extendRes;
};
auto commitRes = co_await withReadWriteTxn(state, std::move(handler), /*expectSelfPrimary=*/false);
if (commitRes.hasError()) {
co_return makeError(commitRes.error().code(), fmt::format("failed to commit: {}", commitRes.error().message()));
}
auto writerLock = co_await state.coScopedLock<"ExtendLease">();
auto start = state.utcNow();
auto dataPtr = co_await state.data_.coLock();
if (!commitRes->has_value()) {
LOG_OP_INFO(*this, "self is disabled, release lease");
dataPtr->lease.lease.reset();
co_return Void{};
}
const auto &newLease = commitRes->value();
auto &leaseInfo = dataPtr->lease;
String leaseChangedReason = [&] {
if (!leaseInfo.lease.has_value()) return "prev no lease";
if (leaseInfo.lease->primary.nodeId != newLease.primary.nodeId) return "primary id changed";
if (leaseInfo.lease->leaseStart != newLease.leaseStart) return "lease start changed";
if (leaseInfo.bootstrapping) return "still bootstrapping";
return "";
}();
leaseInfo.lease = newLease;
leaseInfo.bootstrapping = newLease.primary.nodeId == state.selfId() && !leaseChangedReason.empty();
if (leaseChangedReason.empty()) {
LOG_OP_DBG(*this, "lease not changed");
} else {
LOG_OP_INFO(*this,
"newLease({}-{}-{}) changed({})",
newLease.primary.nodeId,
newLease.leaseStart.toMicroseconds(),
newLease.leaseEnd.toMicroseconds(),
leaseChangedReason);
}
if (leaseInfo.bootstrapping) {
LOG_OP_INFO(*this, "self got lease, loading ...");
co_return co_await onNewLease(state, *this, *dataPtr, start);
}
co_return Void{};
}
};
} // namespace
MgmtdLeaseExtender::MgmtdLeaseExtender(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdLeaseExtender::extend() {
Op op;
co_await [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); }();
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdLeaseExtender {
public:
explicit MgmtdLeaseExtender(MgmtdState &state);
CoTask<void> extend();
private:
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,278 @@
#include "MgmtdMetricsUpdater.h"
#include "common/utils/StringUtils.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
template <NameWrapper MetricName, typename Map>
void recordCount(const Map &counts, auto &&instanceF, auto &&tagF) {
using Key = typename Map::key_type;
static std::map<Key, monitor::ValueRecorder> recorders;
auto func = [&](auto &&f, const Key &k) {
if constexpr (is_specialization<Key, std::tuple>) {
return std::apply(f, k);
} else {
return f(k);
}
};
for (const auto &[k, v] : counts) {
auto it = recorders.find(k);
if (it == recorders.end()) {
auto instance = func(std::forward<decltype(instanceF)>(instanceF), k);
auto tag = func(std::forward<decltype(tagF)>(tagF), k);
std::optional<monitor::TagSet> tagSet;
if (!instance.empty() || !tag.empty()) {
tagSet = monitor::TagSet();
if (!instance.empty()) tagSet->addTag("instance", instance);
if (!tag.empty()) tagSet->addTag("tag", tag);
}
it = recorders.try_emplace(k, String(MetricName), std::move(tagSet)).first;
}
it->second.set(v);
}
}
auto emptyString = [](auto &&...) -> String { return ""; };
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "UpdateMetrics", "bg"> {
String toStringImpl() const final { return "UpdateMetrics"; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
auto dataPtr = co_await state.data_.coSharedLock();
auto clientSessionMap = co_await state.clientSessionMap_.coSharedLock();
auto bootstrapping = dataPtr->leaseStartTs + state.config_.bootstrapping_length() > SteadyClock::now();
reportNodeStatus(*dataPtr);
reportConfigStatus(*dataPtr, *clientSessionMap);
reportChainByReplicaCountStatus(*dataPtr);
reportTargetCount(*dataPtr, bootstrapping);
reportRoutingInfoVersion(*dataPtr);
reportConfigVersions(*dataPtr);
reportLeaseLasting(*dataPtr);
reportReleaseVersionStatus(*dataPtr, *clientSessionMap);
reportChainStatus(*dataPtr);
reportOrphanTargets(*dataPtr);
reportChainTargetCount(*dataPtr);
co_return Void{};
}
void reportNodeStatus(const MgmtdData &data) {
std::map<std::tuple<flat::NodeType, flat::NodeStatus>, int64_t> counts;
std::map<std::tuple<flat::NodeType, flat::NodeId>, int64_t> nodeStatuses;
for (const auto &[_, ni] : data.routingInfo.nodeMap) {
auto &base = ni.base();
++counts[std::make_tuple(base.type, base.status)];
auto key = std::make_tuple(base.type, base.app.nodeId);
nodeStatuses[key] = static_cast<int64_t>(base.status) + 1; // because PRIMARY_MGMTD = 0
}
recordCount<"MgmtdService.NodeStatusCount">(
counts,
[](auto type, auto) { return hf3fs::toString(type); },
[](auto, auto status) { return hf3fs::toString(status); });
recordCount<"MgmtdService.NodeStatus">(
nodeStatuses,
[](auto type, auto id) { return fmt::format("{}_{}", toStringView(type), id.toUnderType()); },
emptyString);
}
void reportConfigStatus(const MgmtdData &data, const ClientSessionMap &clientSessionMap) {
std::map<std::tuple<flat::NodeType, ConfigStatus>, int64_t> counts;
auto add = [&](auto type, auto version, auto status) {
if (version == 0) {
status = ConfigStatus::UNKNOWN;
} else {
auto latestVersion = data.getLatestConfigVersion(type);
assert(version <= latestVersion);
if (version != latestVersion) {
status = ConfigStatus::STALE;
}
}
++counts[std::make_tuple(type, status)];
};
for (const auto &[_, ni] : data.routingInfo.nodeMap) {
auto &base = ni.base();
add(base.type, base.configVersion, base.configStatus);
}
for (const auto &[_, cs] : clientSessionMap) {
auto &base = cs.base();
add(base.type, base.configVersion, base.configStatus);
}
recordCount<"MgmtdService.ConfigStatusCount">(
counts,
[](auto type, auto) { return hf3fs::toString(type); },
[](auto, auto status) { return hf3fs::toString(status); });
}
void reportChainByReplicaCountStatus(const MgmtdData &data) {
std::map<size_t, int64_t> counts;
for (const auto &[_, ci] : data.routingInfo.chains) {
auto rc = ci.targets.size();
++counts[rc];
}
recordCount<"MgmtdService.ChainCountByReplica">(
counts,
[](auto count) { return std::to_string(count); },
emptyString);
}
void reportChainTargetCount(const MgmtdData &data) {
static monitor::ValueRecorder chainCount("MgmtdService.ChainCount");
static monitor::ValueRecorder targetCount("MgmtdService.TargetCount");
chainCount.set(data.routingInfo.chains.size());
targetCount.set(data.routingInfo.getTargets().size());
}
void reportTargetCount(const MgmtdData &data, bool bootstrapping) {
using PS = flat::PublicTargetState;
using LS = flat::LocalTargetState;
std::map<std::tuple<PS, LS>, int64_t> counts;
std::map<std::tuple<flat::ChainId, flat::TargetId>, int64_t> abnormalChains;
for (const auto &[cid, ci] : data.routingInfo.chains) {
bool abnormal = false;
for (const auto &[tid, _] : ci.targets) {
const auto &ti = data.routingInfo.getTargets().at(tid);
auto ps = ti.base().publicState;
auto ls = ti.base().localState;
++counts[std::make_tuple(ps, ls)];
abnormal |= (!bootstrapping && ps != PS::SERVING);
}
if (abnormal) {
for (const auto &cti : ci.targets) {
abnormalChains[std::make_tuple(cid, cti.targetId)] = static_cast<int64_t>(cti.publicState);
}
}
}
recordCount<"MgmtdService.TargetStatusCount">(
counts,
[](auto ps, auto) { return hf3fs::toString(ps); },
[](auto, auto ls) { return hf3fs::toString(ls); });
recordCount<"MgmtdService.AbnormalChainStatus">(
abnormalChains,
[](auto cid, auto) { return std::to_string(cid); },
[](auto, auto tid) { return std::to_string(tid); });
}
void reportRoutingInfoVersion(const MgmtdData &data) {
static monitor::ValueRecorder recorder("MgmtdService.RoutingInfoVersion");
recorder.set(data.routingInfo.routingInfoVersion);
}
void reportConfigVersions(const MgmtdData &data) {
std::map<flat::NodeType, int64_t> versions;
for (const auto &[k, vm] : data.configMap) {
auto ver = vm.rbegin()->first;
versions[k] = ver;
}
recordCount<"MgmtdService.ConfigVersions">(
versions,
[](auto type) { return hf3fs::toString(type); },
emptyString);
}
void reportLeaseLasting(const MgmtdData &data) {
static monitor::ValueRecorder recorder("MgmtdService.LeaseLasting");
auto lasting = Duration(SteadyClock::now() - data.leaseStartTs);
recorder.set(lasting.count());
}
void reportReleaseVersionStatus(const MgmtdData &data, const ClientSessionMap &clientSessionMap) {
constexpr auto invalidType = static_cast<flat::NodeType>(255U);
std::map<std::tuple<flat::NodeType, flat::ReleaseVersion>, int64_t> counts;
auto activeNode = flat::selectActiveNode();
for (const auto &[_, ni] : data.routingInfo.nodeMap) {
if (!activeNode(ni.base())) continue;
++counts[std::make_tuple(ni.base().type, ni.base().app.releaseVersion)];
++counts[std::make_tuple(invalidType, ni.base().app.releaseVersion)];
}
for (const auto &[_, cs] : clientSessionMap) {
++counts[std::make_tuple(cs.base().type, cs.base().releaseVersion)];
}
recordCount<"MgmtdService.ReleaseVersionCount">(
counts,
[](auto type, auto) { return type == invalidType ? "SERVER" : hf3fs::toString(type); },
[](auto, auto rv) { return fmt::format("{}", rv); });
}
void reportChainStatus(const MgmtdData &data) {
using PS = flat::PublicTargetState;
std::map<std::tuple<size_t, String>, int64_t> counts;
for (const auto &[_, ci] : data.routingInfo.chains) {
auto replicaCount = ci.targets.size();
size_t serving = 0, syncing = 0;
for (const auto &ti : ci.targets) {
switch (ti.publicState) {
case PS::SERVING:
++serving;
break;
case PS::SYNCING:
++syncing;
break;
default:
break;
}
}
String status = [&] {
if (serving == replicaCount)
return "SERVING-FULL";
else if (serving)
return "SERVING-PARTIAL";
else if (syncing)
return "SYNCING";
else
return "UNAVAILABLE";
}();
++counts[std::make_tuple(replicaCount, status)];
}
recordCount<"MgmtdService.ChainStatus">(
counts,
[](auto replicaCount, auto) { return std::to_string(replicaCount); },
[](auto, auto status) { return status; });
}
void reportOrphanTargets(const MgmtdData &data) {
static constexpr auto invalidNodeId = flat::NodeId(0);
std::map<flat::NodeId, int64_t> counts;
std::map<flat::NodeId, int64_t> usedSizes;
for (const auto &[tid, ti] : data.routingInfo.orphanTargetsByTargetId) {
XLOGF_IF(FATAL, !ti.nodeId, "Orphan targets should always have nodeId. ti: {}", serde::toJsonString(ti));
auto nodeId = *ti.nodeId;
++counts[nodeId];
++counts[invalidNodeId];
usedSizes[nodeId] += ti.usedSize;
usedSizes[invalidNodeId] += ti.usedSize;
}
recordCount<"MgmtdService.OrphanTargetCount">(
counts,
[](auto nodeId) { return nodeId == invalidNodeId ? "" : std::to_string(nodeId); },
emptyString);
}
};
} // namespace
MgmtdMetricsUpdater::MgmtdMetricsUpdater(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdMetricsUpdater::update() {
Op op;
auto handler = [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); };
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
LOG_OP_INFO(op, "self is not primary, skip");
else
LOG_OP_ERR(op, "failed: {}", res.error());
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdMetricsUpdater {
public:
explicit MgmtdMetricsUpdater(MgmtdState &state);
CoTask<void> update();
private:
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,71 @@
#include "MgmtdNewBornChainsChecker.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "CheckNewBornChains", "bg"> {
String toStringImpl() const final { return "CheckNewBornChains"; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
auto bootstrapInterval = state.config_.new_chain_bootstrap_interval().asUs();
SteadyTime leaseStartTs;
std::vector<flat::ChainId> candidates;
auto steadyNow = SteadyClock::now();
{
auto dataPtr = co_await state.data_.coSharedLock();
leaseStartTs = dataPtr->leaseStartTs;
const auto &ri = dataPtr->routingInfo;
for (const auto &[chainId, bornTime] : ri.newBornChains) {
if (bornTime + bootstrapInterval <= steadyNow) {
candidates.push_back(chainId);
}
}
}
if (!candidates.empty()) {
LOG_OP_DBG(*this, "candidates {}", fmt::join(candidates, ","));
auto dataPtr = co_await state.data_.coLock();
if (leaseStartTs != dataPtr->leaseStartTs) {
LOG_OP_DBG(*this, "lease changed, skip this round");
co_return Void{};
}
auto steadyNow = SteadyClock::now();
auto &ri = dataPtr->routingInfo;
for (auto cid : candidates) {
ri.newBornChains.erase(cid);
const auto &chain = dataPtr->routingInfo.getChain(cid);
for (const auto &cti : chain.targets) {
dataPtr->routingInfo.updateTarget(cti.targetId, [&](auto &ti) { ti.updateTs(steadyNow); });
}
}
}
co_return Void{};
}
};
} // namespace
MgmtdNewBornChainsChecker::MgmtdNewBornChainsChecker(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdNewBornChainsChecker::check() {
Op op;
auto handler = [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); };
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
LOG_OP_INFO(op, "self is not primary, skip");
else
LOG_OP_ERR(op, "failed: {}", res.error());
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdNewBornChainsChecker {
public:
explicit MgmtdNewBornChainsChecker(MgmtdState &state);
CoTask<void> check();
private:
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,44 @@
#include "MgmtdRoutingInfoVersionUpdater.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
struct Op : core::ServiceOperationWithMetric<"MgmtdService", "BumpRoutingInfoVersion", "bg"> {
String toStringImpl() const final { return "BumpRoutingInfoVersion"; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
auto writerLock = co_await state.coScopedLock<"BumpRoutingInfoVersion">();
bool needChange = co_await [&]() -> CoTask<bool> {
auto dataPtr = co_await state.data_.coSharedLock();
co_return dataPtr->routingInfo.routingInfoChanged;
}();
if (needChange) {
CO_RETURN_ON_ERROR(co_await updateStoredRoutingInfo(state, *this));
co_await updateMemoryRoutingInfo(state, *this);
}
co_return Void{};
}
};
} // namespace
MgmtdRoutingInfoVersionUpdater::MgmtdRoutingInfoVersionUpdater(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdRoutingInfoVersionUpdater::update() {
Op op;
auto handler = [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); };
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
LOG_OP_INFO(op, "self is not primary, skip");
else
LOG_OP_ERR(op, "failed: {}", res.error());
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdRoutingInfoVersionUpdater {
public:
explicit MgmtdRoutingInfoVersionUpdater(MgmtdState &state);
CoTask<void> update();
private:
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,120 @@
#include "MgmtdTargetInfoLoader.h"
#include "common/utils/OptionalUtils.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
#define OP_NAME "LoadTargetInfo"
struct Op : core::ServiceOperationWithMetric<"MgmtdService", OP_NAME, "bg"> {
Op(SteadyTime &loadedLeaseStart)
: loadedLeaseStart_(loadedLeaseStart) {}
String toStringImpl() const final { return OP_NAME; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
{
if ((co_await ensureSelfIsPrimary(state)).hasError()) {
co_return Void{};
}
auto dataPtr = co_await state.data_.coSharedLock();
if (loadedLeaseStart_ >= dataPtr->leaseStartTs) {
co_return Void{};
}
loadedLeaseStart_ = dataPtr->leaseStartTs;
}
flat::TargetId startTid(0);
for (;;) {
auto handler = [&](kv::IReadOnlyTransaction &txn) -> CoTryTask<std::vector<flat::TargetInfo>> {
return state.store_.loadTargetsFrom(txn, startTid);
};
auto res = co_await withReadOnlyTxn(state, std::move(handler));
CO_RETURN_ON_ERROR(res);
LOG_OP_DBG(*this, "load {} targets starting from {}", res->size(), startTid);
if (res->empty()) break;
startTid = flat::TargetId(res->back().targetId + 1);
if ((co_await ensureSelfIsPrimary(state)).hasError()) {
co_return Void{};
}
auto dataPtr = co_await state.data_.coLock();
if (loadedLeaseStart_ < dataPtr->leaseStartTs) {
co_return Void{};
}
for (auto &loaded : *res) {
auto tid = loaded.targetId;
auto updater = [this, loaded = std::move(loaded)](TargetInfo &ti) {
ti.locationInitLoaded = true;
ti.persistedNodeId = loaded.nodeId;
ti.persistedDiskIndex = loaded.diskIndex;
LOG_OP_DBG(*this,
"TargetInfo of {} loaded, nodeId={}, diskIndex={}",
loaded.targetId,
loaded.nodeId,
OptionalFmt(loaded.diskIndex));
if (!ti.base().nodeId) {
ti.base().nodeId = loaded.nodeId;
ti.base().diskIndex = loaded.diskIndex;
LOG_OP_DBG(*this, "Fill TargetInfo of {}", loaded.targetId);
}
};
dataPtr->routingInfo.updateTarget(tid, std::move(updater));
}
}
if ((co_await ensureSelfIsPrimary(state)).hasError()) {
co_return Void{};
}
{
auto dataPtr = co_await state.data_.coLock();
if (loadedLeaseStart_ < dataPtr->leaseStartTs) {
co_return Void{};
}
for (const auto &[tid, _] : dataPtr->routingInfo.getTargets()) {
auto updater = [this](TargetInfo &ti) {
if (!ti.locationInitLoaded) {
ti.locationInitLoaded = true;
LOG_OP_DBG(*this, "TargetInfo of {} not found in kv", ti.base().targetId);
}
};
dataPtr->routingInfo.updateTarget(tid, std::move(updater));
}
}
co_return Void{};
}
SteadyTime &loadedLeaseStart_;
};
} // namespace
MgmtdTargetInfoLoader::MgmtdTargetInfoLoader(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdTargetInfoLoader::run() {
Op op(loadedLeaseStart);
auto handler = [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); };
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
LOG_OP_INFO(op, "self is not primary, skip");
else
LOG_OP_ERR(op, "failed: {}", res.error());
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,18 @@
#pragma once
#include "common/utils/Coroutine.h"
#include "common/utils/UtcTime.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdTargetInfoLoader {
public:
explicit MgmtdTargetInfoLoader(MgmtdState &state);
CoTask<void> run();
private:
SteadyTime loadedLeaseStart;
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,82 @@
#include "MgmtdTargetInfoPersister.h"
#include "common/utils/OptionalUtils.h"
#include "core/utils/ServiceOperation.h"
#include "core/utils/runOp.h"
#include "mgmtd/service/MgmtdState.h"
#include "mgmtd/service/helpers.h"
namespace hf3fs::mgmtd {
namespace {
#define OP_NAME "PersistTargetInfo"
struct Op : core::ServiceOperationWithMetric<"MgmtdService", OP_NAME, "bg"> {
String toStringImpl() const final { return OP_NAME; }
auto handle(MgmtdState &state) -> CoTryTask<void> {
std::vector<flat::TargetInfo> candidates;
{
auto dataPtr = co_await state.data_.coSharedLock();
const auto &ri = dataPtr->routingInfo;
for (const auto &[tid, ti] : ri.getTargets()) {
if (ti.locationInitLoaded // the persisted location is known
&& ti.base().nodeId // the actual location is known
&& (ti.persistedNodeId != ti.base().nodeId ||
ti.persistedDiskIndex != ti.base().diskIndex) // and they are different
) {
candidates.push_back(ti.base());
if (static_cast<int>(candidates.size()) >= state.config_.target_info_persist_batch()) break;
}
}
}
if (candidates.empty()) {
co_return Void{};
}
auto handler = [&](kv::IReadWriteTransaction &txn) -> CoTryTask<void> {
for (const auto &ti : candidates) {
CO_RETURN_ON_ERROR(co_await state.store_.storeTargetInfo(txn, ti));
}
co_return Void{};
};
auto res = co_await withReadWriteTxn(state, std::move(handler));
CO_RETURN_ON_ERROR(res);
{
auto dataPtr = co_await state.data_.coLock();
for (const auto &persisted : candidates) {
dataPtr->routingInfo.updateTarget(persisted.targetId, [&](auto &ti) {
ti.persistedNodeId = persisted.nodeId;
ti.persistedDiskIndex = persisted.diskIndex;
LOG_OP_DBG(*this,
"TargetInfo of {} persisted, nodeId={}, diskIndex={}",
persisted.targetId,
persisted.nodeId,
OptionalFmt(persisted.diskIndex));
});
}
}
co_return Void{};
}
};
} // namespace
MgmtdTargetInfoPersister::MgmtdTargetInfoPersister(MgmtdState &state)
: state_(state) {}
CoTask<void> MgmtdTargetInfoPersister::run() {
Op op;
auto handler = [&]() -> CoTryTask<void> { CO_INVOKE_OP_INFO(op, "background", state_); };
auto res = co_await doAsPrimary(state_, std::move(handler));
if (res.hasError()) {
if (res.error().code() == MgmtdCode::kNotPrimary)
LOG_OP_INFO(op, "self is not primary, skip");
else
LOG_OP_ERR(op, "failed: {}", res.error());
}
}
} // namespace hf3fs::mgmtd

View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/utils/Coroutine.h"
namespace hf3fs::mgmtd {
struct MgmtdState;
class MgmtdTargetInfoPersister {
public:
explicit MgmtdTargetInfoPersister(MgmtdState &state);
CoTask<void> run();
private:
MgmtdState &state_;
};
} // namespace hf3fs::mgmtd