Initial commit

This commit is contained in:
dev
2025-02-27 21:53:53 +08:00
commit 815e55e4c0
1291 changed files with 185445 additions and 0 deletions

8
src/fuse/CMakeLists.txt Normal file
View File

@@ -0,0 +1,8 @@
link_directories(/usr/local/lib/x86_64-linux-gnu/)
target_add_lib(hf3fs_fuse common core-app meta-client storage-client fuse3 client-lib-common)
target_add_bin(hf3fs_fuse_main hf3fs_fuse.cpp hf3fs_fuse)
if (ENABLE_FUSE_APPLICATION)
target_compile_definitions(hf3fs_fuse PUBLIC -DENABLE_FUSE_APPLICATION)
endif()

10
src/fuse/FuseAppConfig.cc Normal file
View File

@@ -0,0 +1,10 @@
#include "FuseAppConfig.h"
#include "common/app/ApplicationBase.h"
namespace hf3fs::fuse {
void FuseAppConfig::init(const String &filePath, bool dump, const std::vector<config::KeyValue> &updates) {
auto res = ApplicationBase::initConfig(*this, filePath, dump, updates);
XLOGF_IF(FATAL, !res, "Init app config failed: {}. filePath: {}. dump: {}", res.error(), filePath, dump);
}
} // namespace hf3fs::fuse

16
src/fuse/FuseAppConfig.h Normal file
View File

@@ -0,0 +1,16 @@
#pragma once
#include "common/app/NodeId.h"
#include "common/net/ib/IBDevice.h"
#include "common/utils/ConfigBase.h"
namespace hf3fs::fuse {
struct FuseAppConfig : public ConfigBase<FuseAppConfig> {
public:
using Base = ConfigBase<FuseAppConfig>;
using Base::init;
void init(const String &filePath, bool dump, const std::vector<config::KeyValue> &updates);
flat::NodeId getNodeId() const { return flat::NodeId(0); }
};
} // namespace hf3fs::fuse

125
src/fuse/FuseApplication.cc Normal file
View File

@@ -0,0 +1,125 @@
#ifdef ENABLE_FUSE_APPLICATION
#include "FuseApplication.h"
#include "FuseMainLoop.h"
#include "FuseOps.h"
#include "common/app/Thread.h"
#include "common/app/Utils.h"
DECLARE_string(cfg);
DECLARE_bool(dump_default_cfg);
DECLARE_bool(use_local_cfg);
namespace hf3fs::fuse {
struct FuseApplication::Impl {
Result<Void> parseFlags(int *argc, char ***argv);
Result<Void> initApplication();
Result<Void> initFuseClients();
void stop();
int mainLoop();
Config hf3fsConfig;
flat::AppInfo appInfo;
std::unique_ptr<Launcher> launcher_ = std::make_unique<Launcher>();
std::unique_ptr<ConfigCallbackGuard> onLogConfigUpdated_;
std::unique_ptr<ConfigCallbackGuard> onMemConfigUpdated_;
ConfigFlags configFlags_;
String programName;
bool allowOther = false;
String configMountpoint;
size_t configMaxBufSize = 0;
String configClusterId;
};
FuseApplication::FuseApplication()
: impl_(std::make_unique<Impl>()) {}
FuseApplication::~FuseApplication() = default;
Result<Void> FuseApplication::Impl::parseFlags(int *argc, char ***argv) {
RETURN_ON_ERROR(launcher_->parseFlags(argc, argv));
static constexpr std::string_view dynamicConfigPrefix = "--config.";
RETURN_ON_ERROR(ApplicationBase::parseFlags(dynamicConfigPrefix, argc, argv, configFlags_));
programName = (*argv)[0];
return Void{};
}
Result<Void> FuseApplication::parseFlags(int *argc, char ***argv) { return impl_->parseFlags(argc, argv); }
Result<Void> FuseApplication::Impl::initApplication() {
if (FLAGS_dump_default_cfg) {
fmt::print("{}\n", hf3fsConfig.toString());
exit(0);
}
auto firstInitRes = launcher_->init();
XLOGF_IF(FATAL, !firstInitRes, "Failed to init launcher: {}", firstInitRes.error());
app_detail::loadAppInfo([this] { return launcher_->loadAppInfo(); }, appInfo);
app_detail::initConfig(hf3fsConfig, configFlags_, appInfo, [this] { return launcher_->loadConfigTemplate(); });
XLOGF(INFO, "Server config inited");
app_detail::initCommonComponents(hf3fsConfig.common(), kName, appInfo.nodeId);
onLogConfigUpdated_ = app_detail::makeLogConfigUpdateCallback(hf3fsConfig.common().log(), kName);
onMemConfigUpdated_ = app_detail::makeMemConfigUpdateCallback(hf3fsConfig.common().memory(), appInfo.hostname);
XLOGF(INFO, "Full Config:\n{}", hf3fsConfig.toString());
app_detail::persistConfig(hf3fsConfig);
XLOGF(INFO, "Start to init fuse clients");
auto initRes = initFuseClients();
XLOGF_IF(FATAL, !initRes, "Init fuse clients failed: {}", initRes.error());
XLOGF(INFO, "Init fuse clients finished");
launcher_.reset();
return Void{};
}
Result<Void> FuseApplication::Impl::initFuseClients() {
const auto &launcherConfig = launcher_->launcherConfig();
allowOther = launcherConfig.allow_other();
configMountpoint = launcherConfig.mountpoint();
configMaxBufSize = hf3fsConfig.io_bufs().max_buf_size();
configClusterId = launcherConfig.cluster_id();
auto &d = getFuseClientsInstance();
RETURN_ON_ERROR(d.init(appInfo, launcherConfig.mountpoint(), launcherConfig.token_file(), hf3fsConfig));
return Void{};
}
Result<Void> FuseApplication::initApplication() { return impl_->initApplication(); }
void FuseApplication::Impl::stop() {
getFuseClientsInstance().stop();
hf3fs::stopAndJoin(nullptr);
}
void FuseApplication::stop() { impl_->stop(); }
config::IConfig *FuseApplication::getConfig() { return &impl_->hf3fsConfig; }
const flat::AppInfo *FuseApplication::info() const { return &impl_->appInfo; }
bool FuseApplication::configPushable() const { return FLAGS_cfg.empty() && !FLAGS_use_local_cfg; }
void FuseApplication::onConfigUpdated() { app_detail::persistConfig(impl_->hf3fsConfig); }
int FuseApplication::Impl::mainLoop() {
Thread::unblockInterruptSignals();
return fuseMainLoop(programName, allowOther, configMountpoint, configMaxBufSize, configClusterId);
}
int FuseApplication::mainLoop() { return impl_->mainLoop(); }
} // namespace hf3fs::fuse
#endif

View File

@@ -0,0 +1,55 @@
#pragma once
#ifdef ENABLE_FUSE_APPLICATION
#include "FuseAppConfig.h"
#include "FuseConfig.h"
#include "FuseConfigFetcher.h"
#include "FuseLauncherConfig.h"
#include "common/app/ApplicationBase.h"
#include "core/app/ServerLauncher.h"
namespace hf3fs::fuse {
class FuseApplication : public ApplicationBase {
public:
static constexpr auto kName = "Fuse";
static constexpr auto kNodeType = flat::NodeType::FUSE;
using AppConfig = FuseAppConfig;
using LauncherConfig = FuseLauncherConfig;
using RemoteConfigFetcher = FuseConfigFetcher;
using Launcher = core::ServerLauncher<FuseApplication>;
using Config = FuseConfig;
FuseApplication();
~FuseApplication();
private:
Result<Void> parseFlags(int *argc, char ***argv) final;
Result<Void> initApplication() final;
void stop() final;
int mainLoop() final;
config::IConfig *getConfig() final;
const flat::AppInfo *info() const final;
bool configPushable() const final;
void onConfigUpdated() final;
private:
Result<Void> initServer();
Result<Void> startServer();
struct Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace hf3fs::fuse
#endif

440
src/fuse/FuseClients.cc Normal file
View File

@@ -0,0 +1,440 @@
#include "FuseClients.h"
#include <folly/Random.h>
#include <folly/ScopeGuard.h>
#include <folly/executors/IOThreadPoolExecutor.h>
#include <folly/experimental/coro/BlockingWait.h>
#include <folly/functional/Partial.h>
#include <folly/logging/xlog.h>
#include <fuse3/fuse_lowlevel.h>
#include <memory>
#include <thread>
#include <utility>
#include "common/app/ApplicationBase.h"
#include "common/monitor/Recorder.h"
#include "common/utils/BackgroundRunner.h"
#include "common/utils/Coroutine.h"
#include "common/utils/Duration.h"
#include "common/utils/FileUtils.h"
#include "common/utils/SysResource.h"
#include "fbs/meta/Common.h"
#include "fbs/mgmtd/Rpc.h"
#include "stubs/MetaService/MetaServiceStub.h"
#include "stubs/common/RealStubFactory.h"
#include "stubs/mgmtd/MgmtdServiceStub.h"
namespace hf3fs::fuse {
namespace {
monitor::ValueRecorder dirtyInodesCnt("fuse.dirty_inodes");
Result<Void> establishClientSession(client::IMgmtdClientForClient &mgmtdClient) {
return folly::coro::blockingWait([&]() -> CoTryTask<void> {
auto retryInterval = std::chrono::milliseconds(10);
constexpr auto maxRetryInterval = std::chrono::milliseconds(1000);
Result<Void> res = Void{};
for (int i = 0; i < 40; ++i) {
res = co_await mgmtdClient.extendClientSession();
if (res) break;
XLOGF(CRITICAL, "Try to establish client session failed: {}\nretryCount: {}", res.error(), i);
co_await folly::coro::sleep(retryInterval);
retryInterval = std::min(2 * retryInterval, maxRetryInterval);
}
co_return res;
}());
}
} // namespace
FuseClients::~FuseClients() { stop(); }
Result<Void> FuseClients::init(const flat::AppInfo &appInfo,
const String &mountPoint,
const String &tokenFile,
FuseConfig &fuseConfig) {
config = &fuseConfig;
fuseMount = appInfo.clusterId;
XLOGF_IF(FATAL,
fuseMount.size() >= 32,
"FUSE only support mount name shorter than 32 characters, but {} got.",
fuseMount);
fuseMountpoint = Path(mountPoint).lexically_normal();
if (fuseConfig.remount_prefix()) {
fuseRemountPref = Path(*fuseConfig.remount_prefix()).lexically_normal();
}
if (const char *env_p = std::getenv("HF3FS_FUSE_TOKEN")) {
XLOGF(INFO, "Use token from env var");
fuseToken = std::string(env_p);
} else {
XLOGF(INFO, "Use token from config");
auto tokenRes = loadFile(tokenFile);
RETURN_ON_ERROR(tokenRes);
fuseToken = folly::trimWhitespace(*tokenRes);
}
enableWritebackCache = fuseConfig.enable_writeback_cache();
memsetBeforeRead = fuseConfig.memset_before_read();
maxIdleThreads = fuseConfig.max_idle_threads();
int logicalCores = std::thread::hardware_concurrency();
if (logicalCores != 0) {
maxThreads = std::min(fuseConfig.max_threads(), (logicalCores + 1) / 2);
} else {
maxThreads = fuseConfig.max_threads();
}
bufPool = net::RDMABufPool::create(fuseConfig.io_bufs().max_buf_size(), fuseConfig.rdma_buf_pool_size());
iovs.init(fuseRemountPref.value_or(fuseMountpoint), fuseConfig.iov_limit());
iors.init(fuseConfig.iov_limit());
userConfig.init(fuseConfig);
if (!client) {
client = std::make_unique<net::Client>(fuseConfig.client());
RETURN_ON_ERROR(client->start());
}
auto ctxCreator = [this](net::Address addr) { return client->serdeCtx(addr); };
if (!mgmtdClient) {
mgmtdClient = std::make_shared<client::MgmtdClientForClient>(
appInfo.clusterId,
std::make_unique<stubs::RealStubFactory<mgmtd::MgmtdServiceStub>>(ctxCreator),
fuseConfig.mgmtd());
}
auto physicalHostnameRes = SysResource::hostname(/*physicalMachineName=*/true);
RETURN_ON_ERROR(physicalHostnameRes);
auto containerHostnameRes = SysResource::hostname(/*physicalMachineName=*/false);
RETURN_ON_ERROR(containerHostnameRes);
auto clientId = ClientId::random(*physicalHostnameRes);
mgmtdClient->setClientSessionPayload({clientId.uuid.toHexString(),
flat::NodeType::FUSE,
flat::ClientSessionData::create(
/*universalId=*/*physicalHostnameRes,
/*description=*/fmt::format("fuse: {}", *containerHostnameRes),
appInfo.serviceGroups,
appInfo.releaseVersion),
// TODO: use real user info
flat::UserInfo{}});
mgmtdClient->setConfigListener(ApplicationBase::updateConfig);
folly::coro::blockingWait(mgmtdClient->start(&client->tpg().bgThreadPool().randomPick()));
folly::coro::blockingWait(mgmtdClient->refreshRoutingInfo(/*force=*/false));
RETURN_ON_ERROR(establishClientSession(*mgmtdClient));
storageClient = storage::client::StorageClient::create(clientId, fuseConfig.storage(), *mgmtdClient);
metaClient =
std::make_shared<meta::client::MetaClient>(clientId,
fuseConfig.meta(),
std::make_unique<meta::client::MetaClient::StubFactory>(ctxCreator),
mgmtdClient,
storageClient,
true /* dynStripe */);
metaClient->start(client->tpg().bgThreadPool());
iojqs.reserve(3);
iojqs.emplace_back(new BoundedQueue<IoRingJob>(fuseConfig.io_jobq_sizes().hi()));
iojqs.emplace_back(new BoundedQueue<IoRingJob>(fuseConfig.io_jobq_size()));
iojqs.emplace_back(new BoundedQueue<IoRingJob>(fuseConfig.io_jobq_sizes().lo()));
jitter = fuseConfig.submit_wait_jitter();
auto &tp = client->tpg().bgThreadPool();
auto coros = fuseConfig.batch_io_coros();
for (int i = 0; i < coros; ++i) {
auto exec = &tp.get(i % tp.size());
co_withCancellation(cancelIos.getToken(), ioRingWorker(i, coros)).scheduleOn(exec).start();
}
ioWatches.reserve(3);
for (int i = 0; i < 3; ++i) {
ioWatches.emplace_back(folly::partial(&FuseClients::watch, this, i));
}
periodicSyncWorker = std::make_unique<CoroutinesPool<InodeId>>(config->periodic_sync().worker());
periodicSyncWorker->start(folly::partial(&FuseClients::periodicSync, this), tp);
periodicSyncRunner = std::make_unique<BackgroundRunner>(&tp.pickNextFree());
periodicSyncRunner->start("PeriodSync", folly::partial(&FuseClients::periodicSyncScan, this), [&]() {
return config->periodic_sync().interval() * folly::Random::randDouble(0.7, 1.3);
});
onFuseConfigUpdated = fuseConfig.addCallbackGuard([&fuseConfig = fuseConfig, this] {
memsetBeforeRead = fuseConfig.memset_before_read();
jitter = std::chrono::duration_cast<std::chrono::nanoseconds>(fuseConfig.submit_wait_jitter());
});
notifyInvalExec =
std::make_unique<folly::IOThreadPoolExecutor>(fuseConfig.notify_inval_threads(),
std::make_shared<folly::NamedThreadFactory>("NotifyInvalThread"));
return Void{};
}
void FuseClients::stop() {
if (notifyInvalExec) {
notifyInvalExec->stop();
notifyInvalExec.reset();
}
if (onFuseConfigUpdated) {
onFuseConfigUpdated.reset();
}
cancelIos.requestCancellation();
for (auto &t : ioWatches) {
t.request_stop();
}
if (periodicSyncRunner) {
folly::coro::blockingWait(periodicSyncRunner->stopAll());
periodicSyncRunner.reset();
}
if (periodicSyncWorker) {
periodicSyncWorker->stopAndJoin();
periodicSyncWorker.reset();
}
if (metaClient) {
metaClient->stop();
metaClient.reset();
}
if (storageClient) {
storageClient->stop();
storageClient.reset();
}
if (mgmtdClient) {
folly::coro::blockingWait(mgmtdClient->stop());
mgmtdClient.reset();
}
if (client) {
client->stopAndJoin();
client.reset();
}
}
CoTask<void> FuseClients::ioRingWorker(int i, int ths) {
// a worker thread has its own priority, but it can also execute jobs from queues with a higher priority
// checkHigher is used to make sure the job queue with the thread's own priority doesn't starve
bool checkHigher = true;
while (true) {
auto res = co_await folly::coro::co_awaitTry([this, &checkHigher, i, ths]() -> CoTask<void> {
IoRingJob job;
auto hiThs = config->io_worker_coros().hi(), loThs = config->io_worker_coros().lo();
auto prio = i < hiThs ? 0 : i < (ths - loThs) ? 1 : 2;
if (!config->enable_priority()) {
job = co_await iojqs[prio]->co_dequeue();
} else {
bool gotJob = false;
// if checkHigher, dequeue from a higher job queue if it is full
while (!gotJob) {
if (checkHigher) {
for (int nprio = 0; nprio < prio; ++nprio) {
if (iojqs[nprio]->full()) {
auto dres = iojqs[nprio]->try_dequeue();
if (dres) {
// got a job from higher priority queue, next time pick a same priority job unless the queue is empty
checkHigher = false;
gotJob = true;
job = std::move(*dres);
break;
}
}
}
if (gotJob) {
break;
}
}
// if checkHigher, check from higher prio to lower; otherwise, reverse the checking direction
for (int nprio = checkHigher ? 0 : prio; checkHigher ? nprio <= prio : nprio >= 0;
nprio += checkHigher ? 1 : -1) {
auto [sres, dres] =
co_await folly::coro::collectAnyNoDiscard(folly::coro::sleep(config->io_job_deq_timeout()),
iojqs[nprio]->co_dequeue());
if (dres.hasValue()) {
// if the job is the thread's own priority, next time it can check from higher priority queues
if (!checkHigher && nprio == prio) {
checkHigher = true;
}
gotJob = true;
job = std::move(*dres);
break;
} else if (sres.hasValue()) {
continue;
} else {
dres.throwUnlessValue();
}
}
}
}
while (true) {
auto lookupFiles =
[this](std::vector<std::shared_ptr<RcInode>> &ins, const IoArgs *args, const IoSqe *sqes, int sqec) {
auto lastIid = 0ull;
std::lock_guard lock(inodesMutex);
for (int i = 0; i < sqec; ++i) {
auto idn = args[sqes[i].index].fileIid;
if (i && idn == lastIid) {
ins.emplace_back(ins.back());
continue;
}
lastIid = idn;
auto iid = meta::InodeId(idn);
auto it = inodes.find(iid);
ins.push_back(it == inodes.end() ? (std::shared_ptr<RcInode>()) : it->second);
}
};
auto lookupBufs =
[this](std::vector<Result<lib::ShmBufForIO>> &bufs, const IoArgs *args, const IoSqe *sqe, int sqec) {
auto lastId = Uuid::zero();
std::shared_ptr<lib::ShmBuf> lastShm;
std::lock_guard lock(iovs.shmLock);
for (int i = 0; i < sqec; ++i) {
auto &arg = args[sqe[i].index];
Uuid id;
memcpy(id.data, arg.bufId, sizeof(id.data));
std::shared_ptr<lib::ShmBuf> shm;
if (i && id == lastId) {
shm = lastShm;
} else {
auto it = iovs.shmsById.find(id);
if (it == iovs.shmsById.end()) {
bufs.emplace_back(makeError(StatusCode::kInvalidArg, "buf id not found"));
continue;
}
auto iovd = it->second;
shm = iovs.iovs->table[iovd].load();
if (!shm) {
bufs.emplace_back(makeError(StatusCode::kInvalidArg, "buf id not found"));
continue;
} else if (shm->size < arg.bufOff + arg.ioLen) {
bufs.emplace_back(makeError(StatusCode::kInvalidArg, "invalid buf off and/or io len"));
continue;
}
lastId = id;
lastShm = shm;
}
bufs.emplace_back(lib::ShmBufForIO(std::move(shm), arg.bufOff));
}
};
co_await job.ior->process(job.sqeProcTail,
job.toProc,
*storageClient,
config->storage_io(),
userConfig,
std::move(lookupFiles),
std::move(lookupBufs));
if (iojqs[0]->full() || job.ior->priority != prio) {
sem_post(iors.sems[job.ior->priority].get()); // wake the watchers
} else {
auto jobs = job.ior->jobsToProc(1);
if (!jobs.empty()) {
job = jobs.front();
if (!iojqs[0]->try_enqueue(job)) {
continue;
}
}
}
break;
}
}());
if (UNLIKELY(res.hasException())) {
XLOGF(INFO, "io worker #{} cancelled", i);
if (res.hasException<OperationCancelled>()) {
break;
} else {
XLOGF(FATAL, "got exception in io worker #{}", i);
}
}
}
}
void FuseClients::watch(int prio, std::stop_token stop) {
while (!stop.stop_requested()) {
struct timespec ts;
if (clock_gettime(CLOCK_REALTIME, &ts) < 0) {
continue;
}
auto nsec = ts.tv_nsec + jitter.load().count();
ts.tv_nsec = nsec % 1000000000;
ts.tv_sec += nsec / 1000000000;
if (sem_timedwait(iors.sems[prio].get(), &ts) < 0 && errno == ETIMEDOUT) {
continue;
}
auto gotJobs = false;
do {
gotJobs = false;
auto n = iors.ioRings->slots.nextAvail.load();
for (int i = 0; i < n; ++i) {
auto ior = iors.ioRings->table[i].load();
if (ior && ior->priority == prio) {
auto jobs = ior->jobsToProc(config->max_jobs_per_ioring());
for (auto &&job : jobs) {
gotJobs = true;
iojqs[prio]->enqueue(std::move(job));
}
}
}
} while (gotJobs); // loop till we found no more jobs and then block in the next iter
}
}
CoTask<void> FuseClients::periodicSyncScan() {
if (!config->periodic_sync().enable() || config->readonly()) {
co_return;
}
XLOGF(INFO, "periodicSyncScan run");
std::set<InodeId> dirty;
{
auto guard = dirtyInodes.lock();
auto limit = config->periodic_sync().limit();
dirtyInodesCnt.set(guard->size());
if (guard->size() <= limit) {
dirty = std::exchange(*guard, {});
} else {
XLOGF(WARN, "dirty inodes {} > limit {}", guard->size(), limit);
auto iter = guard->find(lastSynced);
while (dirty.size() < limit) {
if (iter == guard->end()) {
iter = guard->begin();
XLOGF_IF(FATAL, iter == guard->end(), "iter == guard->end() shouldn't happen");
} else {
auto inode = *iter;
lastSynced = inode;
iter = guard->erase(iter);
dirty.insert(inode);
}
}
}
}
for (auto inode : dirty) {
co_await periodicSyncWorker->enqueue(inode);
}
co_return;
}
} // namespace hf3fs::fuse

243
src/fuse/FuseClients.h Normal file
View File

@@ -0,0 +1,243 @@
#pragma once
#include <algorithm>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <folly/MPMCQueue.h>
#include <folly/Math.h>
#include <folly/Synchronized.h>
#include <folly/Utility.h>
#include <folly/executors/IOThreadPoolExecutor.h>
#include <folly/experimental/coro/Mutex.h>
#include <folly/fibers/Semaphore.h>
#include <folly/logging/xlog.h>
#include <memory>
#include <mutex>
#include <optional>
#include <string>
#include <sys/types.h>
#include <thread>
#include <utility>
#include "common/utils/BackgroundRunner.h"
#include "common/utils/CoroutinesPool.h"
#include "common/utils/Result.h"
#include "common/utils/Semaphore.h"
#include "common/utils/UtcTime.h"
#include "fbs/core/user/User.h"
#include "fbs/meta/Common.h"
#define FUSE_USE_VERSION 312
#define OP_LOG_LEVEL DBG
#include <folly/concurrency/AtomicSharedPtr.h>
#include <fuse3/fuse_lowlevel.h>
#include "FuseConfig.h"
#include "IoRing.h"
#include "IovTable.h"
#include "PioV.h"
#include "UserConfig.h"
#include "client/meta/MetaClient.h"
#include "client/mgmtd/MgmtdClientForClient.h"
#include "client/storage/StorageClient.h"
#include "fbs/meta/Schema.h"
namespace hf3fs::fuse {
using flat::Gid;
using flat::Uid;
using flat::UserInfo;
using lib::agent::PioV;
using meta::Acl;
using meta::Directory;
using meta::DirEntry;
using meta::Inode;
using meta::InodeData;
using meta::InodeId;
using meta::Permission;
using storage::client::IOBuffer;
struct InodeWriteBuf {
std::vector<uint8_t> buf;
std::unique_ptr<storage::client::IOBuffer> memh;
off_t off{0};
size_t len{0};
};
struct RcInode {
struct DynamicAttr {
uint64_t written = 0;
uint64_t synced = 0; // period sync
uint64_t fsynced = 0; // fsync, close, truncate, etc...
flat::Uid writer = flat::Uid(0);
uint32_t dynStripe = 1; // dynamic stripe
uint64_t truncateVer = 0; // largest known truncate version.
std::optional<meta::VersionedLength> hintLength; // local hint length
std::optional<UtcTime> atime; // local read time, but only update for write open
std::optional<UtcTime> mtime; // local write time
void update(const Inode &inode, uint64_t syncver = 0, bool fsync = false) {
if (!inode.isFile()) {
return;
}
synced = std::max(synced, syncver);
if (written == synced) {
// clear local hint, since not write happens after sync
hintLength = meta::VersionedLength{0, 0};
}
if (fsync) {
fsynced = std::max(fsynced, syncver);
}
truncateVer = std::max(truncateVer, inode.asFile().truncateVer);
dynStripe = inode.asFile().dynStripe;
}
};
Inode inode;
int refcount;
std::atomic<int> opened;
std::mutex wbMtx;
std::shared_ptr<InodeWriteBuf> writeBuf;
folly::Synchronized<DynamicAttr> dynamicAttr;
folly::coro::Mutex extendStripeLock;
RcInode(Inode inode, int refcount = 1)
: inode(inode),
refcount(refcount),
extendStripeLock() {
if (inode.isFile()) {
auto guard = dynamicAttr.wlock();
guard->truncateVer = inode.asFile().truncateVer;
guard->hintLength = meta::VersionedLength{0, guard->truncateVer};
guard->dynStripe = inode.asFile().dynStripe;
}
}
uint64_t getTruncateVer() const { return dynamicAttr.rlock()->truncateVer; }
void update(const Inode &inode, uint64_t syncver = 0, bool fsync = false) {
if (!inode.isFile()) {
return;
} else {
auto guard = dynamicAttr.wlock();
return guard->update(inode, syncver, fsync);
}
}
// clear hint length, force calculate length on next sync
void clearHintLength() {
auto guard = dynamicAttr.wlock();
guard->hintLength = std::nullopt;
}
CoTryTask<uint64_t> beginWrite(flat::UserInfo userInfo,
meta::client::MetaClient &meta,
uint64_t offset,
uint64_t length);
void finishWrite(flat::UserInfo userInfo, uint64_t truncateVer, uint64_t offset, ssize_t ret);
};
struct FileHandle {
std::shared_ptr<RcInode> rcinode;
bool oDirect;
Uuid sessionId;
/* FileHandle(std::shared_ptr<RcInode> rcinode, bool oDirect, Uuid sessionId) */
/* : rcinode(rcinode), */
/* sessionId(sessionId) {} */
};
struct DirHandle {
size_t dirId;
pid_t pid;
bool iovDir;
};
struct DirEntryVector {
std::shared_ptr<std::vector<DirEntry>> dirEntries;
DirEntryVector(std::shared_ptr<std::vector<DirEntry>> &&dirEntries)
: dirEntries(std::move(dirEntries)) {}
};
struct DirEntryInodeVector {
std::shared_ptr<std::vector<DirEntry>> dirEntries;
std::shared_ptr<std::vector<std::optional<Inode>>> inodes;
DirEntryInodeVector(std::shared_ptr<std::vector<DirEntry>> dirEntries,
std::shared_ptr<std::vector<std::optional<Inode>>> inodes)
: dirEntries(std::move(dirEntries)),
inodes(std::move(inodes)) {}
};
struct FuseClients {
FuseClients() = default;
~FuseClients();
Result<Void> init(const flat::AppInfo &appInfo,
const String &mountPoint,
const String &tokenFile,
FuseConfig &fuseConfig);
void stop();
CoTask<void> ioRingWorker(int i, int ths);
void watch(int prio, std::stop_token stop);
CoTask<void> periodicSyncScan();
CoTask<void> periodicSync(InodeId inodeId);
std::unique_ptr<net::Client> client;
std::shared_ptr<client::MgmtdClientForClient> mgmtdClient;
std::shared_ptr<storage::client::StorageClient> storageClient;
std::shared_ptr<meta::client::MetaClient> metaClient;
std::string fuseToken;
std::string fuseMount;
Path fuseMountpoint;
std::optional<Path> fuseRemountPref;
std::atomic<bool> memsetBeforeRead = false;
int maxIdleThreads = 0;
int maxThreads = 0;
bool enableWritebackCache = false;
std::unique_ptr<ConfigCallbackGuard> onFuseConfigUpdated;
std::unordered_map<InodeId, std::shared_ptr<RcInode>> inodes = {
{InodeId::root(), std::make_shared<RcInode>(Inode{}, 2)}};
std::mutex inodesMutex;
std::unordered_map<uint64_t, DirEntryInodeVector> readdirplusResults;
std::mutex readdirplusResultsMutex;
std::atomic_uint64_t dirHandle{0};
std::shared_ptr<net::RDMABufPool> bufPool;
int maxBufsize = 0;
fuse_session *se = nullptr;
std::atomic<std::chrono::nanoseconds> jitter;
IovTable iovs;
IoRingTable iors;
std::vector<std::unique_ptr<BoundedQueue<IoRingJob>>> iojqs; // job queues
std::vector<std::jthread> ioWatches;
folly::CancellationSource cancelIos;
UserConfig userConfig;
folly::Synchronized<std::set<InodeId>, std::mutex> dirtyInodes;
std::atomic<InodeId> lastSynced;
std::unique_ptr<BackgroundRunner> periodicSyncRunner;
std::unique_ptr<CoroutinesPool<InodeId>> periodicSyncWorker;
std::unique_ptr<folly::IOThreadPoolExecutor> notifyInvalExec;
const FuseConfig *config;
};
} // namespace hf3fs::fuse

92
src/fuse/FuseConfig.h Normal file
View File

@@ -0,0 +1,92 @@
#pragma once
#include "client/meta/MetaClient.h"
#include "client/mgmtd/MgmtdClientForClient.h"
#include "client/storage/StorageClient.h"
#include "common/app/ApplicationBase.h"
#include "common/utils/ConfigBase.h"
#include "common/utils/CoroutinesPool.h"
namespace hf3fs::fuse {
struct FuseConfig : public ConfigBase<FuseConfig> {
#ifdef ENABLE_FUSE_APPLICATION
CONFIG_OBJ(common, ApplicationBase::Config);
#else
CONFIG_ITEM(cluster_id, "");
CONFIG_ITEM(token_file, "");
CONFIG_ITEM(mountpoint, "");
CONFIG_ITEM(allow_other, true);
CONFIG_OBJ(ib_devices, net::IBDevice::Config);
CONFIG_OBJ(log, logging::LogConfig);
CONFIG_OBJ(monitor, monitor::Monitor::Config);
#endif
CONFIG_HOT_UPDATED_ITEM(enable_priority, false);
CONFIG_HOT_UPDATED_ITEM(enable_interrupt, false);
CONFIG_HOT_UPDATED_ITEM(attr_timeout, (double)30);
CONFIG_HOT_UPDATED_ITEM(entry_timeout, (double)30);
CONFIG_HOT_UPDATED_ITEM(negative_timeout, (double)5);
CONFIG_HOT_UPDATED_ITEM(symlink_timeout, (double)5);
CONFIG_HOT_UPDATED_ITEM(readonly, false);
CONFIG_HOT_UPDATED_ITEM(memset_before_read, false);
CONFIG_HOT_UPDATED_ITEM(enable_read_cache, true);
CONFIG_HOT_UPDATED_ITEM(fsync_length_hint, false); // for test
CONFIG_HOT_UPDATED_ITEM(fdatasync_update_length, false);
CONFIG_ITEM(max_idle_threads, 10);
CONFIG_ITEM(max_threads, 256);
CONFIG_ITEM(max_readahead, 16_MB);
CONFIG_ITEM(max_background, 32);
CONFIG_ITEM(enable_writeback_cache, false);
CONFIG_OBJ(client, net::Client::Config);
CONFIG_OBJ(mgmtd, client::MgmtdClientForClient::Config);
CONFIG_OBJ(storage, storage::client::StorageClient::Config);
CONFIG_OBJ(meta, meta::client::MetaClient::Config, [&](auto &cfg) { cfg.set_dynamic_stripe(true); });
CONFIG_ITEM(remount_prefix, (std::optional<std::string>)std::nullopt);
CONFIG_ITEM(iov_limit, 1_MB);
CONFIG_ITEM(io_jobq_size, 1024);
CONFIG_ITEM(batch_io_coros, 128);
CONFIG_ITEM(rdma_buf_pool_size, 1024);
CONFIG_ITEM(time_granularity, 1_s);
CONFIG_HOT_UPDATED_ITEM(check_rmrf, true);
CONFIG_ITEM(notify_inval_threads, 32);
CONFIG_ITEM(max_uid, 1_M);
CONFIG_HOT_UPDATED_ITEM(chunk_size_limit, 0_KB);
CONFIG_SECT(io_jobq_sizes, {
CONFIG_ITEM(hi, 32);
CONFIG_ITEM(lo, 4096);
});
CONFIG_SECT(io_worker_coros, {
CONFIG_HOT_UPDATED_ITEM(hi, 8);
CONFIG_HOT_UPDATED_ITEM(lo, 8);
});
CONFIG_HOT_UPDATED_ITEM(io_job_deq_timeout, 1_ms);
CONFIG_OBJ(storage_io, storage::client::IoOptions);
CONFIG_HOT_UPDATED_ITEM(submit_wait_jitter, 1_ms);
CONFIG_HOT_UPDATED_ITEM(max_jobs_per_ioring, 32);
CONFIG_SECT(io_bufs, {
CONFIG_ITEM(max_buf_size, 1_MB);
CONFIG_ITEM(max_readahead, 256_KB);
CONFIG_ITEM(write_buf_size, 1_MB);
});
CONFIG_HOT_UPDATED_ITEM(flush_on_stat, true);
CONFIG_HOT_UPDATED_ITEM(sync_on_stat, true);
CONFIG_HOT_UPDATED_ITEM(dryrun_bench_mode, false);
struct PeriodSync : public ConfigBase<PeriodSync> {
CONFIG_HOT_UPDATED_ITEM(enable, true);
CONFIG_HOT_UPDATED_ITEM(interval, 30_s);
CONFIG_HOT_UPDATED_ITEM(limit, 1000u);
CONFIG_HOT_UPDATED_ITEM(flush_write_buf, true);
CONFIG_OBJ(worker, CoroutinesPoolBase::Config, [](auto &cfg) { cfg.set_coroutines_num(4); });
};
CONFIG_OBJ(periodic_sync, PeriodSync);
};
} // namespace hf3fs::fuse

View File

@@ -0,0 +1,19 @@
#include "FuseConfigFetcher.h"
#include <folly/experimental/coro/BlockingWait.h>
#include "common/utils/SysResource.h"
namespace hf3fs::fuse {
Result<Void> FuseConfigFetcher::completeAppInfo(flat::AppInfo &appInfo [[maybe_unused]]) {
auto hostnameRes = SysResource::hostname(/*physicalMachineName=*/true);
RETURN_ON_ERROR(hostnameRes);
RETURN_ON_ERROR(ensureClientInited());
return folly::coro::blockingWait([&]() -> CoTryTask<void> {
auto tagsRes = co_await mgmtdClient_->getUniversalTags(*hostnameRes);
CO_RETURN_ON_ERROR(tagsRes);
appInfo.tags = std::move(*tagsRes);
co_return Void{};
}());
}
} // namespace hf3fs::fuse

View File

@@ -0,0 +1,10 @@
#pragma once
#include "core/app/MgmtdClientFetcher.h"
namespace hf3fs::fuse {
struct FuseConfigFetcher : public core::launcher::MgmtdClientFetcher {
using core::launcher::MgmtdClientFetcher::MgmtdClientFetcher;
Result<Void> completeAppInfo(flat::AppInfo &appInfo) final;
};
} // namespace hf3fs::fuse

View File

@@ -0,0 +1,10 @@
#include "FuseLauncherConfig.h"
#include "common/app/ApplicationBase.h"
#include "common/app/Utils.h"
namespace hf3fs::fuse {
void FuseLauncherConfig::init(const String &filePath, bool dump, const std::vector<config::KeyValue> &updates) {
app_detail::initConfigFromFile(*this, filePath, dump, updates);
}
} // namespace hf3fs::fuse

View File

@@ -0,0 +1,24 @@
#pragma once
#include "client/mgmtd/MgmtdClientForClient.h"
#include "common/app/NodeId.h"
#include "common/net/Client.h"
#include "common/utils/ConfigBase.h"
namespace hf3fs::fuse {
struct FuseLauncherConfig : public ConfigBase<FuseLauncherConfig> {
CONFIG_ITEM(cluster_id, "");
CONFIG_OBJ(ib_devices, net::IBDevice::Config);
CONFIG_OBJ(client, net::Client::Config);
CONFIG_OBJ(mgmtd_client, client::MgmtdClientForClient::Config);
CONFIG_ITEM(mountpoint, "");
CONFIG_ITEM(allow_other, true);
CONFIG_ITEM(token_file, "");
public:
using Base = ConfigBase<FuseLauncherConfig>;
using Base::init;
void init(const String &filePath, bool dump, const std::vector<config::KeyValue> &updates);
};
} // namespace hf3fs::fuse

107
src/fuse/FuseMainLoop.cc Normal file
View File

@@ -0,0 +1,107 @@
#include "FuseMainLoop.h"
#include <folly/ScopeGuard.h>
#include <folly/logging/xlog.h>
#include "FuseOps.h"
namespace hf3fs::fuse {
int fuseMainLoop(const String &programName,
bool allowOther,
const String &mountpoint,
size_t maxbufsize,
const String &clusterId) {
auto &d = getFuseClientsInstance();
const auto &ops = getFuseOps();
std::stack<std::function<void()>> onStopHooks;
SCOPE_EXIT {
while (!onStopHooks.empty()) {
onStopHooks.top()();
onStopHooks.pop();
}
};
std::vector<std::string> fuseArgs;
fuseArgs.push_back(programName);
if (allowOther) {
fuseArgs.push_back("-o");
fuseArgs.push_back("allow_other");
fuseArgs.push_back("-o");
fuseArgs.push_back("default_permissions");
}
fuseArgs.push_back("-o");
fuseArgs.push_back("auto_unmount");
fuseArgs.push_back("-o");
fuseArgs.push_back(fmt::format("max_read={}", maxbufsize));
fuseArgs.push_back(mountpoint);
fuseArgs.push_back("-o");
fuseArgs.push_back("subtype=hf3fs");
fuseArgs.push_back("-o");
fuseArgs.push_back("fsname=hf3fs." + clusterId);
std::vector<char *> fuseArgsPtr;
for (auto &arg : fuseArgs) {
fuseArgsPtr.push_back(const_cast<char *>(arg.c_str()));
}
struct fuse_args args = FUSE_ARGS_INIT((int)fuseArgsPtr.size(), fuseArgsPtr.data());
// struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
struct fuse_cmdline_opts opts;
struct fuse_loop_config *config = fuse_loop_cfg_create();
SCOPE_EXIT { fuse_loop_cfg_destroy(config); };
if (fuse_parse_cmdline(&args, &opts) != 0) {
return 1;
}
onStopHooks.push([&] {
free(opts.mountpoint);
fuse_opt_free_args(&args);
});
if (opts.show_help) {
printf("This is hf3fs fuse!\n");
fuse_cmdline_help();
fuse_lowlevel_help();
return 0;
} else if (opts.show_version) {
printf("What's my version?\n");
fuse_lowlevel_version();
return 0;
}
if (opts.mountpoint == nullptr) {
printf("No mountpoint.\n");
return 1;
}
d.se = fuse_session_new(&args, &ops, sizeof(ops), NULL);
if (d.se == nullptr) {
return 1;
}
onStopHooks.push([&] { fuse_session_destroy(d.se); });
if (fuse_set_signal_handlers(d.se) != 0) {
return 1;
}
onStopHooks.push([&] { fuse_remove_signal_handlers(d.se); });
if (fuse_session_mount(d.se, opts.mountpoint) != 0) {
return 1;
}
onStopHooks.push([&] { fuse_session_unmount(d.se); });
int ret = -1;
if (opts.singlethread) {
ret = fuse_session_loop(d.se);
} else {
fuse_loop_cfg_set_clone_fd(config, opts.clone_fd);
fuse_loop_cfg_set_idle_threads(config, d.maxIdleThreads);
fuse_loop_cfg_set_max_threads(config, d.maxThreads);
ret = fuse_session_loop_mt(d.se, config);
}
return ret ? 1 : 0;
}
} // namespace hf3fs::fuse

11
src/fuse/FuseMainLoop.h Normal file
View File

@@ -0,0 +1,11 @@
#pragma once
#include "common/utils/String.h"
namespace hf3fs::fuse {
int fuseMainLoop(const String &programName,
bool allowOther,
const String &mountpoint,
size_t maxbufsize,
const String &clusterId);
}

2715
src/fuse/FuseOps.cc Normal file

File diff suppressed because it is too large Load Diff

8
src/fuse/FuseOps.h Normal file
View File

@@ -0,0 +1,8 @@
#pragma once
#include "FuseClients.h"
namespace hf3fs::fuse {
FuseClients &getFuseClientsInstance();
const fuse_lowlevel_ops &getFuseOps();
} // namespace hf3fs::fuse

285
src/fuse/IoRing.cc Normal file
View File

@@ -0,0 +1,285 @@
#include "IoRing.h"
#include <optional>
#include <type_traits>
#include <utility>
#include "PioV.h"
#include "common/utils/UtcTime.h"
#include "fbs/meta/Schema.h"
#include "fuse/FuseClients.h"
#include "fuse/FuseOps.h"
#include "lib/api/hf3fs_usrbio.h"
namespace hf3fs::fuse {
std::vector<IoRingJob> IoRing::jobsToProc(int maxJobs) {
std::vector<IoRingJob> jobs;
std::lock_guard lock(cqeMtx_);
auto spt = sqeProcTail_;
auto sqes = sqeCount();
auto cqeAvail = entries - 1 - processing_ - cqeCount();
while (sqes && (int)jobs.size() < maxJobs) {
int toProc;
if (ioDepth > 0) {
toProc = ioDepth;
if (toProc > sqes || toProc > cqeAvail) { // even if we finish the io, we got no place to store the results
break;
}
} else {
toProc = std::min(sqes, cqeAvail);
if (ioDepth < 0) {
auto iod = -ioDepth;
if (toProc > iod) {
toProc = iod;
} else if (toProc < iod && timeout.count()) {
auto now = lastCheck_ = SteadyClock::now();
if (!lastCheck_) { // first time to find the (not enough) ios, wait till timeout
lastCheck_ = now;
break;
} else if (*lastCheck_ + timeout > now) { // ios not enough to fill a batch, and time has not run out
break;
}
}
lastCheck_ = std::nullopt;
}
}
if (jobs.empty()) {
jobs.reserve(ioDepth ? std::min(maxJobs, sqes / abs(ioDepth) + 1) : 1);
}
jobs.emplace_back(IoRingJob{shared_from_this(), spt, toProc});
spt = (spt + toProc) % entries;
sqeProcTails_.push_back(spt);
processing_ += toProc;
sqes -= toProc;
cqeAvail -= toProc;
}
sqeProcTail_ = spt;
return jobs;
}
CoTask<void> IoRing::process(
int spt,
int toProc,
storage::client::StorageClient &storageClient,
const storage::client::IoOptions &storageIo,
UserConfig &userConfig,
std::function<void(std::vector<std::shared_ptr<RcInode>> &, const IoArgs *, const IoSqe *, int)> &&lookupFiles,
std::function<void(std::vector<Result<lib::ShmBufForIO>> &, const IoArgs *, const IoSqe *, int)> &&lookupBufs) {
static monitor::LatencyRecorder overallLatency("usrbio.piov.overall", monitor::TagSet{{"mount_name", mountName}});
static monitor::LatencyRecorder prepareLatency("usrbio.piov.prepare", monitor::TagSet{{"mount_name", mountName}});
static monitor::LatencyRecorder submitLatency("usrbio.piov.submit", monitor::TagSet{{"mount_name", mountName}});
static monitor::LatencyRecorder completeLatency("usrbio.piov.complete", monitor::TagSet{{"mount_name", mountName}});
static monitor::DistributionRecorder ioSizeDist("usrbio.piov.io_size", monitor::TagSet{{"mount_name", mountName}});
static monitor::DistributionRecorder ioDepthDist("usrbio.piov.io_depth", monitor::TagSet{{"mount_name", mountName}});
static monitor::DistributionRecorder totalBytesDist("usrbio.piov.total_bytes",
monitor::TagSet{{"mount_name", mountName}});
static monitor::DistributionRecorder distinctFilesDist("usrbio.piov.distinct_files",
monitor::TagSet{{"mount_name", mountName}});
static monitor::DistributionRecorder distinctBufsDist("usrbio.piov.distinct_bufs",
monitor::TagSet{{"mount_name", mountName}});
static monitor::CountRecorder bwCount("usrbio.piov.bw", monitor::TagSet{{"mount_name", mountName}});
auto start = SteadyClock::now(), overallStart = start;
std::string ioType = forRead_ ? "read" : "write";
auto uids = std::to_string(userInfo_.uid.toUnderType());
auto &config = userConfig.getConfig(userInfo_);
std::vector<ssize_t> res;
if (!forRead_ && config.readonly()) {
res = std::vector<ssize_t>(toProc, static_cast<ssize_t>(-StatusCode::kReadOnlyMode));
} else {
res = std::vector<ssize_t>(toProc, 0);
size_t iod = 0, totalBytes = 0;
std::set<uint64_t> distinctFiles;
std::set<Uuid> distinctBufs;
std::vector<std::shared_ptr<RcInode>> inodes;
inodes.reserve(toProc);
lookupFiles(inodes, ringSection, sqeSection + spt, std::min(toProc, entries - spt));
if ((int)inodes.size() < toProc) {
lookupFiles(inodes, ringSection, sqeSection, toProc - (int)inodes.size());
}
std::vector<Result<lib::ShmBufForIO>> bufs;
bufs.reserve(toProc);
lookupBufs(bufs, ringSection, sqeSection + spt, std::min(toProc, entries - spt));
if ((int)bufs.size() < toProc) {
lookupBufs(bufs, ringSection, sqeSection, toProc - (int)bufs.size());
}
lib::agent::PioV ioExec(storageClient, config.chunk_size_limit(), res);
std::vector<uint64_t> truncateVers;
if (!forRead_) {
truncateVers.resize(toProc, 0);
}
for (int i = 0; i < toProc; ++i) {
auto idx = (spt + i) % entries;
auto sqe = sqeSection[idx];
const auto &args = ringSection[sqe.index];
++iod;
totalBytes += args.ioLen;
distinctFiles.insert(args.fileIid);
Uuid id;
memcpy(id.data, args.bufId, sizeof(id.data));
distinctBufs.insert(id);
ioSizeDist.addSample(args.ioLen, monitor::TagSet{{"io", ioType}, {"uid", uids}});
if (!inodes[i]) {
res[i] = -static_cast<ssize_t>(MetaCode::kNotFile);
continue;
}
if (!bufs[i]) {
res[i] = -static_cast<ssize_t>(bufs[i].error().code());
continue;
}
auto memh = co_await bufs[i]->memh(args.ioLen);
if (!memh) {
res[i] = -static_cast<ssize_t>(memh.error().code());
continue;
} else if (!bufs[i]->ptr() || !*memh) {
XLOGF(ERR, "{} is null when doing usrbio", *memh ? "buf ptr" : "memh");
res[i] = -static_cast<ssize_t>(ClientAgentCode::kIovShmFail);
continue;
}
if (!forRead_) {
auto beginWrite =
co_await inodes[i]->beginWrite(userInfo_, *getFuseClientsInstance().metaClient, args.fileOff, args.ioLen);
if (beginWrite.hasError()) {
res[i] = -static_cast<ssize_t>(beginWrite.error().code());
continue;
}
truncateVers[i] = *beginWrite;
}
auto addRes = forRead_
? ioExec.addRead(i, inodes[i]->inode, 0, args.fileOff, args.ioLen, bufs[i]->ptr(), **memh)
: ioExec.addWrite(i, inodes[i]->inode, 0, args.fileOff, args.ioLen, bufs[i]->ptr(), **memh);
if (!addRes) {
res[i] = -static_cast<ssize_t>(addRes.error().code());
}
}
auto now = SteadyClock::now();
prepareLatency.addSample(now - start, monitor::TagSet{{"io", ioType}, {"uid", uids}});
start = now;
ioDepthDist.addSample(iod, monitor::TagSet{{"io", ioType}, {"uid", uids}});
totalBytesDist.addSample(totalBytes, monitor::TagSet{{"io", ioType}, {"uid", uids}});
distinctFilesDist.addSample(distinctFiles.size(), monitor::TagSet{{"io", ioType}, {"uid", uids}});
distinctBufsDist.addSample(distinctBufs.size(), monitor::TagSet{{"io", ioType}, {"uid", uids}});
auto readOpt = storageIo.read();
if (flags_ & HF3FS_IOR_ALLOW_READ_UNCOMMITTED) {
readOpt.set_allowReadUncommitted(true);
}
auto execRes = co_await (forRead_ ? ioExec.executeRead(userInfo_, readOpt)
: ioExec.executeWrite(userInfo_, storageIo.write()));
now = SteadyClock::now();
submitLatency.addSample(now - start, monitor::TagSet{{"io", ioType}, {"uid", uids}});
start = now;
if (!execRes) {
for (auto &r : res) {
if (r >= 0) {
r = -static_cast<ssize_t>(execRes.error().code());
}
}
} else {
ioExec.finishIo(!(flags_ & HF3FS_IOR_FORBID_READ_HOLES));
}
if (!forRead_) {
for (int i = 0; i < toProc; ++i) {
auto &inode = inodes[i];
if (!inode) {
continue;
}
auto sqe = sqeSection[(spt + i) % entries];
auto off = ringSection[sqe.index].fileOff;
auto r = res[i];
inode->finishWrite(userInfo_.uid, truncateVers[i], off, r);
}
}
}
auto newSpt = (spt + toProc) % entries;
std::vector<IoSqe> sqes(toProc);
for (int i = 0; i < toProc; ++i) {
sqes[i] = sqeSection[(spt + i) % entries];
}
{
// lock for between threads (io workers)
// atomics for between processes (io worker & io generator)
std::lock_guard lock(cqeMtx_);
if (sqeProcTails_.empty()) {
XLOGF(FATAL, "bug?! sqeProcTails_ is empty");
}
if (sqeProcTails_.front() != newSpt) {
sqeDoneTails_.insert(newSpt);
} else {
sqeTail = newSpt;
sqeProcTails_.pop_front();
while (!sqeDoneTails_.empty()) {
if (sqeProcTails_.empty()) {
XLOGF(FATAL, "bug?! sqeProcTails_ is empty");
}
auto first = sqeProcTails_.front();
auto it = sqeDoneTails_.find(first);
if (it == sqeDoneTails_.end()) {
break;
} else {
sqeTail = first;
sqeProcTails_.pop_front();
sqeDoneTails_.erase(it);
}
}
}
for (int i = 0; i < toProc; ++i) {
auto &sqe = sqes[i];
auto r = res[i];
auto addRes = addCqe(sqe.index, r >= 0 ? r : -static_cast<ssize_t>(StatusCode::toErrno(-r)), sqe.userdata);
if (!addRes) {
XLOGF(FATAL, "failed to add cqe");
}
}
processing_ -= toProc;
}
sem_post(cqeSem.get());
size_t doneBytes = 0;
for (auto r : res) {
if (r > 0) {
doneBytes += r;
}
}
bwCount.addSample(doneBytes, monitor::TagSet{{"io", ioType}, {"uid", uids}});
auto now = SteadyClock::now();
completeLatency.addSample(now - start, monitor::TagSet{{"io", ioType}, {"uid", uids}});
overallLatency.addSample(now - overallStart, monitor::TagSet{{"io", ioType}, {"uid", uids}});
}
} // namespace hf3fs::fuse

279
src/fuse/IoRing.h Normal file
View File

@@ -0,0 +1,279 @@
#pragma once
#include <cstdint>
#include <semaphore.h>
#include "IovTable.h"
#include "UserConfig.h"
#include "client/storage/StorageClient.h"
#include "common/utils/AtomicSharedPtrTable.h"
#include "common/utils/Coroutine.h"
#include "common/utils/Uuid.h"
#include "fbs/meta/Schema.h"
#include "lib/common/Shm.h"
namespace hf3fs::fuse {
struct RcInode;
struct IoArgs {
uint8_t bufId[16];
size_t bufOff;
uint64_t fileIid;
size_t fileOff;
uint64_t ioLen;
const void *userdata;
};
struct IoSqe {
int32_t index;
const void *userdata;
};
struct IoCqe {
int32_t index;
int32_t reserved;
int64_t result;
const void *userdata;
};
class IoRing;
struct IoRingJob {
std::shared_ptr<IoRing> ior;
int sqeProcTail;
int toProc;
};
// we allow multiple io workers to process the same ioring, but different ranges
// so 1 ioring can be used to submit ios processed in parallel
// howoever, we don't allow multiple threads to prepare ios in the same ioring
// or batches may be mixed and things may get ugly
class IoRing : public std::enable_shared_from_this<IoRing> {
public:
static int ringMarkerSize() {
auto n = std::atomic_ref<int32_t>::required_alignment;
return (4 + n - 1) / n * n;
}
// allocate 1 more slot for queue emptiness/fullness checking
static int ioRingEntries(size_t bufSize) {
auto n = ringMarkerSize();
// n * 4 for sqe/cqe head/tail markers
return (int)std::min((size_t)std::numeric_limits<int>::max(),
(bufSize - 4096 - n * 4 - sizeof(sem_t)) / (sizeof(IoArgs) + sizeof(IoCqe) + sizeof(IoSqe))) -
1;
}
static size_t bytesRequired(int entries) {
auto n = ringMarkerSize();
// n * 4 for sqe/cqe head/tail markers
return n * 4 + sizeof(sem_t) + (sizeof(IoArgs) + sizeof(IoCqe) + sizeof(IoSqe)) * (entries + 1) + 4096;
}
public:
using std::enable_shared_from_this<IoRing>::shared_from_this;
// the shm arg is used to keep it from being destroyed when the iov link is removed
IoRing(std::shared_ptr<lib::ShmBuf> shm,
std::string_view nm,
const meta::UserInfo &ui,
bool read,
uint8_t *buf,
size_t size,
int iod,
int prio,
Duration to,
uint64_t flags,
bool owner = true)
: name(nm),
entries(ioRingEntries(size) + 1),
ioDepth(iod),
priority(prio),
timeout(to),
sqeHead_((int32_t *)buf),
sqeTail_((int32_t *)(buf + ringMarkerSize())),
cqeHead_((int32_t *)(buf + ringMarkerSize() * 2)),
cqeTail_((int32_t *)(buf + ringMarkerSize() * 3)),
sqeHead(*sqeHead_),
sqeTail(*sqeTail_),
cqeHead(*cqeHead_),
cqeTail(*cqeTail_),
ringSection((IoArgs *)(buf + ringMarkerSize() * 4)),
cqeSection((IoCqe *)(ringSection + entries)),
sqeSection((IoSqe *)(cqeSection + entries)),
slots(entries - 1),
shm_(std::move(shm)),
userInfo_(ui),
forRead_(read),
flags_(flags) {
XLOGF_IF(FATAL,
(uintptr_t)(sqeSection + entries + sizeof(sem_t)) > (uintptr_t)(buf + size),
"sem has a bad address {}, after whole shm starts at {} with {} bytes",
(void *)(sqeSection + entries + sizeof(sem_t)),
(void *)buf,
size);
auto sem = (sem_t *)(sqeSection + entries);
if (owner) {
sem_init(sem, 1, 0);
}
cqeSem.reset(sem);
}
std::vector<IoRingJob> jobsToProc(int maxJobs);
int cqeCount() const { return (cqeHead.load() + entries - cqeTail.load()) % entries; }
CoTask<void> process(
int spt,
int toProc,
storage::client::StorageClient &storageClient,
const storage::client::IoOptions &storageIo,
UserConfig &userConfig,
std::function<void(std::vector<std::shared_ptr<RcInode>> &, const IoArgs *, const IoSqe *, int)> &&lookupFiles,
std::function<void(std::vector<Result<lib::ShmBufForIO>> &, const IoArgs *, const IoSqe *, int)> &&lookupBufs);
public:
bool addSqe(int idx, const void *userdata) {
auto h = sqeHead.load();
if ((h + 1) % entries == sqeTail.load()) {
return false;
}
auto &sqe = sqeSection[h];
sqe.index = idx;
sqe.userdata = userdata;
sqeHead.store((h + 1) % entries);
return true;
}
bool sqeTailAfter(int a, int b) {
auto h = sqeHead.load();
if (a == h) { // caught up with head, must be the last
return true;
}
auto ah = a > h, bh = b > h;
if (ah == bh) { // both after or before head, bigger is after
return a > b;
} else { // the one before head is after
return bh;
}
}
public:
std::string name;
std::string mountName;
int entries;
int ioDepth;
int priority;
Duration timeout;
private:
int32_t *sqeHead_;
int32_t *sqeTail_;
int32_t *cqeHead_;
int32_t *cqeTail_;
std::optional<SteadyTime> lastCheck_;
public:
std::atomic_ref<int32_t> sqeHead;
std::atomic_ref<int32_t> sqeTail;
std::atomic_ref<int32_t> cqeHead;
std::atomic_ref<int32_t> cqeTail;
IoArgs *ringSection;
IoCqe *cqeSection;
IoSqe *sqeSection;
std::unique_ptr<sem_t, std::function<void(sem_t *)>> cqeSem{nullptr, [](sem_t *p) { sem_destroy(p); }};
public:
AvailSlots slots;
private:
int sqeCount() const { return (sqeHead.load() + entries - sqeProcTail_) % entries; }
[[nodiscard]] bool addCqe(int idx, ssize_t res, const void *userdata) {
auto h = cqeHead.load();
if ((h + 1) % entries == cqeTail.load()) {
return false;
}
auto &cqe = cqeSection[h];
cqe.index = idx;
cqe.result = res;
cqe.userdata = userdata;
cqeHead.store((h + 1) % entries);
return true;
}
private: // for fuse
std::shared_ptr<lib::ShmBuf> shm_;
meta::UserInfo userInfo_;
bool forRead_;
uint64_t flags_;
std::mutex cqeMtx_; // when reporting cqes
int sqeProcTail_{0};
int processing_{0};
std::deque<int> sqeProcTails_; // tails claimed and processing
std::set<int> sqeDoneTails_; // tails done processing
};
struct IoRingTable {
void init(int cap) {
for (int prio = 0; prio <= 2; ++prio) {
auto sp = "/" + semOpenPath(prio);
sems.emplace_back(sem_open(sp.c_str(), O_CREAT, 0666, 0), [sp](sem_t *p) {
sem_close(p);
sem_unlink(sp.c_str());
});
chmod(semPath(prio).c_str(), 0666);
}
ioRings = std::make_unique<AtomicSharedPtrTable<IoRing>>(cap);
}
Result<int> addIoRing(const Path &mountName,
std::shared_ptr<lib::ShmBuf> shm,
std::string_view name,
const meta::UserInfo &ui,
bool forRead,
uint8_t *buf,
size_t size,
int ioDepth,
const hf3fs::lib::IorAttrs &attrs) {
auto idxRes = ioRings->alloc();
if (!idxRes) {
return makeError(ClientAgentCode::kTooManyOpenFiles, "too many io rings");
}
auto idx = *idxRes;
auto ior = std::make_shared<
IoRing>(std::move(shm), name, ui, forRead, buf, size, ioDepth, attrs.priority, attrs.timeout, attrs.flags);
ior->mountName = mountName.native();
ioRings->table[idx].store(ior);
return idx;
}
void rmIoRing(int idx) { ioRings->remove(idx); }
std::vector<std::unique_ptr<sem_t, std::function<void(sem_t *)>>> sems;
std::unique_ptr<AtomicSharedPtrTable<IoRing>> ioRings;
private:
static std::string semOpenPath(int prio) {
static std::vector<Uuid> semIds{Uuid::random(), Uuid::random(), Uuid::random()};
return fmt::format("hf3fs-submit-ios.{}", semIds[prio].toHexString());
}
public:
static std::string semName(int prio) {
return fmt::format("submit-ios{}", prio == 1 ? "" : prio == 0 ? ".ph" : ".pl");
}
static Path semPath(int prio) { return Path("/dev/shm") / ("sem." + semOpenPath(prio)); }
static meta::Inode lookupSem(int prio) {
static const std::vector<meta::Inode> inodes{
{meta::InodeId{meta::InodeId::iovDir().u64() - 1},
meta::InodeData{meta::Symlink{semPath(0)}, meta::Acl{meta::Uid{0}, meta::Gid{0}, meta::Permission{0666}}}},
{meta::InodeId{meta::InodeId::iovDir().u64() - 2},
meta::InodeData{meta::Symlink{semPath(1)}, meta::Acl{meta::Uid{0}, meta::Gid{0}, meta::Permission{0666}}}},
{meta::InodeId{meta::InodeId::iovDir().u64() - 3},
meta::InodeData{meta::Symlink{semPath(2)}, meta::Acl{meta::Uid{0}, meta::Gid{0}, meta::Permission{0666}}}}};
return inodes[prio];
}
};
} // namespace hf3fs::fuse

337
src/fuse/IovTable.cc Normal file
View File

@@ -0,0 +1,337 @@
#include "IovTable.h"
#include <folly/experimental/coro/BlockingWait.h>
#include "IoRing.h"
#include "fbs/meta/Common.h"
namespace hf3fs::fuse {
using hf3fs::lib::IorAttrs;
const Path linkPref = "/dev/shm";
void IovTable::init(const Path &mount, int cap) {
mountName = mount.native();
iovs = std::make_unique<AtomicSharedPtrTable<lib::ShmBuf>>(cap);
}
struct IovAttrs {
Uuid id;
size_t blockSize = 0;
bool isIoRing = false;
bool forRead = true;
int ioDepth = 0;
std::optional<IorAttrs> iora;
};
static Result<IovAttrs> parseKey(const char *key) {
IovAttrs iova;
std::vector<std::string> fnParts;
folly::split('.', key, fnParts);
auto idRes = Uuid::fromHexString(fnParts[0]);
RETURN_ON_ERROR(idRes);
iova.id = *idRes;
for (size_t i = 1; i < fnParts.size(); ++i) {
auto dec = fnParts[i];
switch (dec[0]) {
case 'b': { // block size
auto i = atoll(dec.c_str() + 1);
if (i <= 0) {
return makeError(StatusCode::kInvalidArg, "invalid block size set in shm key");
}
iova.blockSize = (size_t)i;
break;
}
case 'r':
case 'w': { // is io ring
auto i = atoll(dec.c_str() + 1);
iova.isIoRing = true;
iova.forRead = dec[0] == 'r';
iova.ioDepth = i;
break;
}
case 't': {
if (!iova.iora) {
iova.iora = IorAttrs{};
}
auto i = atoi(dec.c_str() + 1);
if (i < 0) {
return makeError(StatusCode::kInvalidArg, "invalid io job check timeout {}", dec.c_str() + 1);
}
iova.iora->timeout = Duration(std::chrono::nanoseconds((uint64_t)i * 1000000));
break;
}
case 'f': {
if (!iova.iora) {
iova.iora = IorAttrs{};
}
char *ep;
auto i = strtoull(dec.c_str() + 1, &ep, 2);
if (*ep != 0 || i < 0) {
return makeError(StatusCode::kInvalidArg, "invalid io exec flags {}", dec.c_str() + 1);
}
iova.iora->flags = i;
break;
}
case 'p': // should be io ring, priority
if (!iova.iora) {
iova.iora = IorAttrs{};
}
switch (dec.c_str()[1]) {
case 'l':
iova.iora->priority = 2;
break;
case 'h':
iova.iora->priority = 0;
break;
case 'n':
case '\0':
iova.iora->priority = 1;
break;
default:
return makeError(StatusCode::kInvalidArg, "invalid priority set in shm key");
}
break;
}
}
if (!iova.isIoRing && iova.iora) {
return makeError(StatusCode::kInvalidArg, "ioring attrs set for non-ioring");
}
return iova;
}
constexpr int iovIidStart = meta::InodeId::iovIidStart;
std::optional<int> IovTable::iovDesc(meta::InodeId iid) {
auto iidn = (ssize_t)iid.u64();
auto diid = (ssize_t)meta::InodeId::iovDir().u64();
if (iidn >= 0 || iidn > diid - iovIidStart || iidn < diid - std::numeric_limits<int>::max()) {
return std::nullopt;
}
return diid - iidn - iovIidStart;
}
Result<std::pair<meta::Inode, std::shared_ptr<lib::ShmBuf>>> IovTable::addIov(const char *key,
const Path &shmPath,
pid_t pid,
const meta::UserInfo &ui,
folly::Executor::KeepAlive<> exec,
storage::client::StorageClient &sc) {
static monitor::DistributionRecorder mapTimesCount("fuse.iov.times", monitor::TagSet{{"mount_name", mountName}});
static monitor::DistributionRecorder mapBytesDist("fuse.iov.bytes", monitor::TagSet{{"mount_name", mountName}});
static monitor::CountRecorder shmSizeCount("fuse.iov.total_bytes", monitor::TagSet{{"mount_name", mountName}}, false);
static monitor::LatencyRecorder allocLatency("fuse.iov.latency.map", monitor::TagSet{{"mount_name", mountName}});
static monitor::DistributionRecorder ibRegBytesDist("fuse.iov.bytes.ib_reg",
monitor::TagSet{{"mount_name", mountName}});
static monitor::LatencyRecorder ibRegLatency("fuse.iov.latency.ib_reg", monitor::TagSet{{"mount_name", mountName}});
auto iovaRes = parseKey(key);
RETURN_ON_ERROR(iovaRes);
Path shmOpenPath("/");
shmOpenPath /= shmPath.lexically_relative(linkPref);
struct stat st;
if (stat(shmPath.c_str(), &st) == -1 || !S_ISREG(st.st_mode)) {
return makeError(StatusCode::kInvalidArg, "failed to stat shm path or it's not a regular file");
}
if (iovaRes->blockSize > (size_t)st.st_size) {
return makeError(StatusCode::kInvalidArg, "invalid block size set in shm key");
} else if (iovaRes->isIoRing && iovaRes->ioDepth > IoRing::ioRingEntries((size_t)st.st_size)) {
return makeError(StatusCode::kInvalidArg, "invalid io batch size set in shm key");
}
while (true) {
auto iovdRes = iovs->alloc();
if (!iovdRes) {
return makeError(ClientAgentCode::kTooManyOpenFiles, "too many iovs allocated");
}
auto iovd = *iovdRes;
bool dealloc = true;
SCOPE_EXIT {
if (dealloc) {
iovs->dealloc(iovd);
}
};
auto start = SteadyClock::now();
auto uids = std::to_string(ui.uid.toUnderType());
std::shared_ptr<lib::ShmBuf> shm;
try {
shm.reset(
new lib::ShmBuf(shmOpenPath, 0, st.st_size, iovaRes->blockSize, iovaRes->id),
[uids,
&shmSizeCount = shmSizeCount,
&mapTimesCount = mapTimesCount,
&mapBytesDist = mapBytesDist,
&allocLatency = allocLatency,
&ibRegLatency = ibRegLatency](auto p) {
auto start = SteadyClock::now();
folly::coro::blockingWait(p->deregisterForIO());
auto now = SteadyClock::now();
ibRegLatency.addSample(now - start, monitor::TagSet{{"instance", "dereg"}, {"uid", uids}});
start = now;
p->unmapBuf();
allocLatency.addSample(SteadyClock::now() - start, monitor::TagSet{{"instance", "free"}, {"uid", uids}});
mapTimesCount.addSample(1, monitor::TagSet{{"instance", "free"}, {"uid", uids}});
mapBytesDist.addSample(p->size, monitor::TagSet{{"instance", "free"}, {"uid", uids}});
shmSizeCount.addSample(-p->size);
delete p;
});
} catch (const std::runtime_error &e) {
return makeError(ClientAgentCode::kIovShmFail, std::string("failed to open/map shm for iov ") + e.what());
}
allocLatency.addSample(SteadyClock::now() - start, monitor::TagSet{{"instance", "alloc"}, {"uid", uids}});
mapTimesCount.addSample(1, monitor::TagSet{{"instance", "alloc"}, {"uid", uids}});
mapBytesDist.addSample(shm->size, monitor::TagSet{{"instance", "alloc"}, {"uid", uids}});
shmSizeCount.addSample(shm->size, monitor::TagSet{{"uid", uids}});
shm->key = key;
shm->user = ui.uid;
shm->pid = pid;
shm->isIoRing = iovaRes->isIoRing;
shm->forRead = iovaRes->forRead;
shm->ioDepth = iovaRes->ioDepth;
shm->iora = iovaRes->iora;
// the idx should be reserved by us
iovs->table[iovd].store(shm);
start = SteadyClock::now();
auto recordMetrics = [blockSize = shm->blockSize, start, uids]() mutable {
ibRegBytesDist.addSample(blockSize, monitor::TagSet{{"instance", "reg"}, {"uid", uids}});
ibRegLatency.addSample(SteadyClock::now() - start, monitor::TagSet{{"instance", "reg"}, {"uid", uids}});
};
if (!iovaRes->isIoRing) { // io ring bufs don't need to be registered for ib io
folly::coro::blockingWait(shm->registerForIO(exec, sc, recordMetrics));
}
{
std::unique_lock lock(iovdLock_);
iovds_[key] = iovd;
}
{
std::unique_lock lock(shmLock);
shmsById[iovaRes->id] = iovd;
}
auto statRes = statIov(iovd, ui);
RETURN_ON_ERROR(statRes);
dealloc = false;
return std::make_pair(*statRes, iovaRes->isIoRing ? shm : std::shared_ptr<lib::ShmBuf>());
}
}
Result<std::shared_ptr<lib::ShmBuf>> IovTable::rmIov(const char *key, const meta::UserInfo &ui) {
auto res = lookupIov(key, ui);
RETURN_ON_ERROR(res);
{
std::unique_lock lock(iovdLock_);
iovds_.erase(key);
}
{
auto res = parseKey(key);
std::unique_lock lock(shmLock);
shmsById.erase(res->id);
}
auto iovd = iovDesc(res->id);
auto shm = iovs->table[*iovd].load();
iovs->remove(*iovd);
return shm;
}
Result<meta::Inode> IovTable::statIov(int iovd, const meta::UserInfo &ui) {
if (iovd < 0 || iovd >= (int)iovs->table.size()) {
return makeError(MetaCode::kNotFound, "invalid iov desc");
}
auto shm = iovs->table[iovd].load();
if (!shm) {
return makeError(MetaCode::kNotFound,
fmt::format("iov desc {} not found, next avail {}", iovd, iovs->slots.nextAvail.load()));
}
if (shm->user != ui.uid) {
XLOGF(ERR, "statting user {} iov belongs to {}", ui.uid, shm->user);
return makeError(MetaCode::kNoPermission, "iov not for user");
}
return meta::Inode{
meta::InodeId::iov(iovd),
meta::InodeData{meta::Symlink{linkPref / shm->path}, meta::Acl{ui.uid, ui.gid, meta::Permission(0400)}}};
}
Result<meta::Inode> IovTable::lookupIov(const char *key, const meta::UserInfo &ui) {
int iovd = -1;
{
std::shared_lock lock(iovdLock_);
auto it = iovds_.find(key);
if (it == iovds_.end()) {
return makeError(MetaCode::kNotFound, std::string("iov key not found ") + key);
} else {
iovd = it->second;
}
}
return statIov(iovd, ui);
}
std::pair<std::shared_ptr<std::vector<meta::DirEntry>>, std::shared_ptr<std::vector<std::optional<meta::Inode>>>>
IovTable::listIovs(const meta::UserInfo &ui) {
meta::DirEntry de{meta::InodeId::iovDir(), ""};
auto n = iovs->slots.nextAvail.load();
std::vector<meta::DirEntry> des;
std::vector<std::optional<meta::Inode>> ins;
des.reserve(n + 3);
ins.reserve(n + 3);
for (int prio = 0; prio <= 2; ++prio) {
de.name = IoRingTable::semName(prio);
des.emplace_back(de);
auto inode = IoRingTable::lookupSem(prio);
ins.emplace_back(std::move(inode));
}
meta::Acl acl{meta::Uid{ui.uid}, meta::Gid{ui.gid}, meta::Permission{0400}};
for (int i = 0; i < n; ++i) {
auto iov = iovs->table[i].load();
if (!iov || iov->user != ui.uid) {
continue;
}
de.name = iov->key;
des.emplace_back(de);
ins.emplace_back(
meta::Inode{meta::InodeId{meta::InodeId::iov(i)}, meta::InodeData{meta::Symlink{linkPref / iov->path}, acl}});
}
return std::make_pair(std::make_shared<std::vector<meta::DirEntry>>(std::move(des)),
std::make_shared<std::vector<std::optional<meta::Inode>>>(std::move(ins)));
}
} // namespace hf3fs::fuse

39
src/fuse/IovTable.h Normal file
View File

@@ -0,0 +1,39 @@
#pragma once
#include <string>
#include "common/utils/AtomicSharedPtrTable.h"
#include "fbs/meta/Schema.h"
#include "lib/common/Shm.h"
namespace hf3fs::fuse {
class IovTable {
public:
IovTable() = default;
void init(const Path &mount, int cap);
Result<std::pair<meta::Inode, std::shared_ptr<lib::ShmBuf>>> addIov(const char *key,
const Path &shmPath,
pid_t pid,
const meta::UserInfo &ui,
folly::Executor::KeepAlive<> exec,
storage::client::StorageClient &sc);
Result<std::shared_ptr<lib::ShmBuf>> rmIov(const char *key, const meta::UserInfo &ui);
Result<meta::Inode> lookupIov(const char *key, const meta::UserInfo &ui);
std::optional<int> iovDesc(meta::InodeId iid);
Result<meta::Inode> statIov(int key, const meta::UserInfo &ui);
public:
std::pair<std::shared_ptr<std::vector<meta::DirEntry>>, std::shared_ptr<std::vector<std::optional<meta::Inode>>>>
listIovs(const meta::UserInfo &ui);
public:
std::string mountName;
std::shared_mutex shmLock;
robin_hood::unordered_map<Uuid, int> shmsById;
std::unique_ptr<AtomicSharedPtrTable<lib::ShmBuf>> iovs;
private:
mutable std::shared_mutex iovdLock_;
robin_hood::unordered_map<std::string, int> iovds_;
};
} // namespace hf3fs::fuse

275
src/fuse/PioV.cc Normal file
View File

@@ -0,0 +1,275 @@
#include "PioV.h"
namespace hf3fs::lib::agent {
PioV::PioV(storage::client::StorageClient &storageClient, int chunkSizeLim, std::vector<ssize_t> &res)
: storageClient_(storageClient),
chunkSizeLim_(chunkSizeLim),
res_(res) {
auto &mgmtdClient = storageClient_.getMgmtdClient();
auto routingInfo = mgmtdClient.getRoutingInfo();
XLOGF_IF(DFATAL, !routingInfo || !routingInfo->raw(), "RoutingInfo not found");
routingInfo_ = routingInfo->raw();
}
hf3fs::Result<Void> PioV::addRead(size_t idx,
const meta::Inode &inode,
uint16_t track,
off_t off,
size_t len,
void *buf,
storage::client::IOBuffer &memh) {
if (!wios_.empty()) {
return makeError(StatusCode::kInvalidArg, "adding read to write operations");
} else if (!inode.isFile()) {
res_[idx] = -static_cast<ssize_t>(MetaCode::kNotFile);
return Void{};
}
if (rios_.empty()) {
rios_.reserve(res_.size());
}
size_t bufOff = 0;
RETURN_ON_ERROR(chunkIo(inode,
track,
off,
len,
[this, &memh, &bufOff, idx, buf](storage::ChainId chain,
storage::ChunkId chunk,
uint32_t,
uint32_t chunkOff,
uint32_t chunkLen) {
rios_.emplace_back(storageClient_.createReadIO(chain,
chunk,
chunkOff,
chunkLen,
(uint8_t *)buf + bufOff,
&memh,
reinterpret_cast<void *>(idx)));
bufOff += chunkLen;
}));
return Void{};
}
hf3fs::Result<Void> PioV::addWrite(size_t idx,
const meta::Inode &inode,
uint16_t track,
off_t off,
size_t len,
const void *buf,
storage::client::IOBuffer &memh) {
if (!rios_.empty()) {
return makeError(StatusCode::kInvalidArg, "adding write to read operations");
} else if (!inode.isFile()) {
res_[idx] = -static_cast<ssize_t>(MetaCode::kNotFile);
return Void{};
}
if (wios_.empty()) {
wios_.reserve(res_.size());
}
size_t bufOff = 0;
RETURN_ON_ERROR(chunkIo(inode,
track,
off,
len,
[this, &inode, &memh, &bufOff, idx, buf, off](storage::ChainId chain,
storage::ChunkId chunk,
uint32_t chunkSize,
uint32_t chunkOff,
uint32_t chunkLen) {
wios_.emplace_back(storageClient_.createWriteIO(chain,
chunk,
chunkOff,
chunkLen,
chunkSize,
(uint8_t *)buf + bufOff,
&memh,
reinterpret_cast<void *>(idx)));
bufOff += chunkLen;
potentialLens_[inode.id] = std::max(potentialLens_[inode.id], off + bufOff + chunkLen);
}));
return Void{};
}
Result<Void> PioV::chunkIo(
const meta::Inode &inode,
uint16_t track,
off_t off,
size_t len,
std::function<void(storage::ChainId, storage::ChunkId, uint32_t, uint32_t, uint32_t)> &&consumeChunk) {
const auto &f = inode.asFile();
auto chunkSize = f.layout.chunkSize;
auto chunkOff = off % chunkSize;
auto rcs = chunkSizeLim_ ? std::min((size_t)chunkSizeLim_, chunkSize.u64()) : chunkSize.u64();
for (size_t lastL = 0, l = std::min((size_t)(chunkSize - chunkOff), len); // l is within a chunk
l < len + chunkSize; // for the last chunk
lastL = l, l += chunkSize) {
l = std::min(l, len); // l is always growing longer
auto opOff = off + lastL;
auto chain = f.getChainId(inode, opOff, *routingInfo_, track);
RETURN_ON_ERROR(chain);
auto fchunk = f.getChunkId(inode.id, opOff);
RETURN_ON_ERROR(fchunk);
auto chunk = storage::ChunkId(*fchunk);
auto chunkLen = l - lastL;
for (size_t co = 0; co < chunkLen; co += rcs) {
consumeChunk(*chain, chunk, chunkSize, chunkOff + co, std::min(rcs, chunkLen - co));
}
chunkOff = 0; // chunks other than first always starts from 0
}
return Void{};
}
CoTryTask<void> PioV::executeRead(const UserInfo &userInfo, const storage::client::ReadOptions &options) {
assert(wios_.empty() && trops_.empty());
if (rios_.empty()) {
co_return Void{};
}
co_return co_await storageClient_.batchRead(rios_, userInfo, options);
}
CoTryTask<void> PioV::executeWrite(const UserInfo &userInfo, const storage::client::WriteOptions &options) {
assert(rios_.empty());
if (wios_.empty()) {
co_return Void{};
}
if (!trops_.empty()) {
std::vector<storage::client::TruncateChunkOp *> failed;
std::set<size_t> badWios;
auto r = co_await storageClient_.truncateChunks(trops_, userInfo, options, &failed);
CO_RETURN_ON_ERROR(r);
if (!failed.empty()) {
for (auto op : failed) {
res_[reinterpret_cast<size_t>(op->userCtx)] = -static_cast<ssize_t>(op->result.lengthInfo.error().code());
for (size_t i = 0; i < wios_.size(); ++i) {
if (wios_[i].userCtx == op->userCtx) {
badWios.insert(i);
}
}
}
std::vector<storage::client::WriteIO> wios2;
wios2.reserve(wios_.size() - badWios.size());
for (size_t i = 0; i < wios_.size(); ++i) {
if (badWios.find(i) == badWios.end()) {
auto &wio = wios_[i];
wios2.emplace_back(storageClient_.createWriteIO(wio.routingTarget.chainId,
wio.chunkId,
wio.offset,
wio.length,
wio.chunkSize,
wio.data,
wio.buffer,
wio.userCtx));
}
}
std::swap(wios_, wios2);
}
}
co_return co_await storageClient_.batchWrite(wios_, userInfo, options);
}
template <typename Io>
void concatIoRes(bool read, std::vector<ssize_t> &res, const Io &ios, bool allowHoles) {
ssize_t lastIovIdx = -1;
bool inHole = false;
std::optional<size_t> holeIo = 0;
size_t holeOff = 0;
size_t holeSize = 0;
ssize_t iovIdx = 0;
for (size_t i = 0; i < ios.size(); ++i, lastIovIdx = iovIdx) {
const auto &io = ios[i];
iovIdx = reinterpret_cast<ssize_t>(io.userCtx);
uint32_t iolen = 0;
if (io.result.lengthInfo) {
iolen = *io.result.lengthInfo;
if (iolen > 0 && inHole && lastIovIdx == iovIdx) {
// the front part of the data read from a chunk can never be part of a hole when anything is read from the chunk
// storage server promises that, or how can it tell us that it only reads into the buffer from the middle?
// so the hole size always ends at the last chunk end, and we can add it to res to calc the correct read size
// and if the hole is not a hole, but the eof, the prev res will be the no. of bytes read from the file
const auto &lastIo = ios[i - 1];
auto lastChunk = meta::ChunkId::unpack(lastIo.chunkId.data());
auto chunk = meta::ChunkId::unpack(io.chunkId.data());
XLOGF(ERR,
"found hole when {}ing inode id {}, hole starts before chunk idx {} chain id {} got {}_B in chunk "
"idx {} / {} chain id {} after hole iov idx {} last iov idx {} hole io idx {} off in first io {} size {}",
read ? "read" : "writ",
lastChunk.inode().u64(),
lastChunk.chunk(),
lastIo.routingTarget.chainId,
iolen,
chunk.inode().u64(),
chunk.chunk(),
io.routingTarget.chainId,
iovIdx,
lastIovIdx,
*holeIo,
holeOff,
holeSize);
if (read && allowHoles) { // zerofill the hole we found
auto &hio = ios[*holeIo];
memset(hio.data + holeOff, 0, hio.length - holeOff);
for (size_t j = *holeIo + 1; j < i; ++j) {
memset(ios[j].data, 0, ios[j].length);
}
res[iovIdx] += holeSize;
inHole = false; // out of hole now, but we may begin a new hole
holeIo = std::nullopt;
} else {
res[iovIdx] = -static_cast<ssize_t>(ClientAgentCode::kHoleInIoOutcome);
}
} else if (lastIovIdx != iovIdx) {
inHole = false;
holeIo = std::nullopt;
}
} else if (read && io.result.lengthInfo.error().code() == StorageClientCode::kChunkNotFound) {
// ignore
} else {
if (res[iovIdx] >= 0) {
res[iovIdx] = -static_cast<ssize_t>(io.result.lengthInfo.error().code());
}
}
if (res[iovIdx] < 0) {
continue;
}
if (iolen < io.length) { // shorter than expected
inHole = true;
if (!holeIo) {
holeIo = i;
holeOff = iolen;
holeSize = 0;
}
holeSize += io.length - iolen;
}
res[iovIdx] += iolen;
}
}
void PioV::finishIo(bool allowHoles) {
if (wios_.empty()) {
concatIoRes(true, res_, rios_, allowHoles);
} else {
concatIoRes(false, res_, wios_, false);
}
}
} // namespace hf3fs::lib::agent

59
src/fuse/PioV.h Normal file
View File

@@ -0,0 +1,59 @@
#pragma once
#include <functional>
#include "client/meta/MetaClient.h"
#include "client/storage/StorageClient.h"
#include "common/utils/Result.h"
namespace hf3fs::lib::agent {
using flat::UserInfo;
class PioV {
public:
PioV(storage::client::StorageClient &storageClient, int chunkSizeLim, std::vector<ssize_t> &res);
hf3fs::Result<Void> addRead(size_t idx,
const meta::Inode &inode,
uint16_t track,
off_t off,
size_t len,
void *buf,
storage::client::IOBuffer &memh);
// if metaClient and userInfo are not nullptr,
// meta server will be contacted for latest file length if known length is shorter than off
CoTryTask<bool> checkWriteOff(size_t idx,
meta::client::MetaClient *metaClient,
const UserInfo *userInfo,
const meta::Inode &inode,
size_t off);
hf3fs::Result<Void> addWrite(size_t idx,
const meta::Inode &inode,
uint16_t track,
off_t off,
size_t len,
const void *buf,
storage::client::IOBuffer &memh);
CoTryTask<void> executeRead(const UserInfo &userInfo,
const storage::client::ReadOptions &options = storage::client::ReadOptions());
CoTryTask<void> executeWrite(const UserInfo &userInfo,
const storage::client::WriteOptions &options = storage::client::WriteOptions());
void finishIo(bool allowHoles);
private:
Result<Void> chunkIo(
const meta::Inode &inode,
uint16_t track,
off_t off,
size_t len,
std::function<void(storage::ChainId, storage::ChunkId, uint32_t, uint32_t, uint32_t)> &&consumeChunk);
private:
storage::client::StorageClient &storageClient_;
int chunkSizeLim_;
std::shared_ptr<flat::RoutingInfo> routingInfo_;
std::vector<ssize_t> &res_;
std::vector<storage::client::ReadIO> rios_;
std::vector<storage::client::WriteIO> wios_;
std::vector<storage::client::TruncateChunkOp> trops_;
std::map<meta::InodeId, size_t> potentialLens_;
};
} // namespace hf3fs::lib::agent

174
src/fuse/UserConfig.cc Normal file
View File

@@ -0,0 +1,174 @@
#include "UserConfig.h"
#include "fbs/meta/Common.h"
namespace hf3fs::fuse {
void UserConfig::init(FuseConfig &config) {
config_ = &config;
configs_.reset(new AtomicSharedPtrTable<LocalConfig>(config.max_uid() + 1));
storageMaxConcXmit_ = config.storage().net_client().rdma_control().max_concurrent_transmission();
config.addCallbackGuard([&config = config, this] {
storageMaxConcXmit_ = config.storage().net_client().rdma_control().max_concurrent_transmission();
std::lock_guard lock(userMtx_);
for (auto u : users_) {
auto lconf = configs_->table[u.toUnderType()].load();
if (!lconf) {
continue;
}
FuseConfig conf2 = config;
std::lock_guard lock2(lconf->mtx);
conf2.atomicallyUpdate(lconf->updatedItems, true);
lconf->config = std::move(conf2);
}
});
}
Result<std::pair<bool, int>> UserConfig::parseKey(const char *key) {
if (!strncmp(key, "sys.", 4)) {
auto it = std::find(systemKeys.begin(), systemKeys.end(), key + 4);
if (it == systemKeys.end()) {
return makeError(StatusCode::kInvalidArg, fmt::format("no such system key or key not customizable {}", key));
} else {
return std::make_pair(true, it - systemKeys.begin());
}
} else if (!strncmp(key, "usr.", 4)) {
auto it = std::find(userKeys.begin(), userKeys.end(), key + 4);
if (it == userKeys.end()) {
return makeError(StatusCode::kInvalidArg, fmt::format("no such user key or key not customizable {}", key));
} else {
return std::make_pair(false, it - userKeys.begin());
}
} else {
return makeError(StatusCode::kInvalidArg, fmt::format("key {} has to be prefixed with 'sys.' or 'usr.'", key));
}
}
Result<meta::Inode> UserConfig::setConfig(const char *key, const char *val, const meta::UserInfo &ui) {
auto kres = parseKey(key);
RETURN_ON_ERROR(kres);
key += 4;
auto [isSys, kidx] = *kres;
if (isSys) {
if (!strcmp(key, "storage.net_client.rdma_control.max_concurrent_transmission")) {
auto n = atoi(val);
if (n <= 0 || n > 2 * storageMaxConcXmit_) {
return makeError(
StatusCode::kInvalidArg,
fmt::format(
"invalid value '{}' for key '{}', possible reason is it is larger than twice of system setting {}",
val,
key - 4,
storageMaxConcXmit_.load()));
}
}
RETURN_ON_ERROR(config_->atomicallyUpdate({std::make_pair(key, val)}, true));
return meta::Inode{configIid(false, true, kidx),
{meta::Symlink{val}, meta::Acl{ui.uid, ui.gid, meta::Permission{0400}}}};
} else {
if (!strcmp(key, "readonly") && strcmp(val, "true") && config_->readonly()) {
// if readonly is turned on cluster-wide, user cannot disable if locally
return makeError(StatusCode::kInvalidArg, "cannot turn off readonly mode when it is turned on by the sys admin");
}
auto uid = ui.uid;
std::lock_guard lock(userMtx_);
auto uit = users_.find(uid);
auto uidx = uid.toUnderType();
if (uit == users_.end()) {
if (uidx >= configs_->table.size()) {
return makeError(MetaCode::kNoPermission, fmt::format("uid {} too large for user config", uid));
}
configs_->table[uidx].store(std::make_shared<LocalConfig>(*config_));
users_.insert(uid);
}
auto lconf = configs_->table[uidx].load();
auto kv = std::make_pair(key, val);
auto res = lconf->config.atomicallyUpdate({kv}, true);
RETURN_ON_ERROR(res);
lconf->updatedItems.emplace_back(std::move(kv));
return meta::Inode{configIid(false, false, kidx), {meta::Symlink{val}, {uid, ui.gid, meta::Permission{0400}}}};
}
return makeError(MetaCode::kNoPermission, fmt::format("key '{}' not found in config, or not allowed to be set", key));
}
Result<meta::Inode> UserConfig::lookupConfig(const char *key, const meta::UserInfo &ui) {
auto kres = parseKey(key);
RETURN_ON_ERROR(kres);
key += 4;
auto [isSys, kidx] = *kres;
return statConfig(configIid(true, isSys, kidx), ui);
}
const FuseConfig &UserConfig::getConfig(const meta::UserInfo &ui) {
auto uid = ui.uid;
std::lock_guard lock(userMtx_);
auto it = users_.find(uid);
if (it == users_.end()) {
return *config_;
} else {
auto lconf = configs_->table[uid.toUnderType()].load();
return lconf->config;
}
}
Result<meta::Inode> UserConfig::statConfig(meta::InodeId iid, const meta::UserInfo &ui) {
auto kidx = (int64_t)(meta::InodeId::getConf().u64() - 1 - iid.u64());
if (kidx < 0 || kidx >= (int64_t)(systemKeys.size() + userKeys.size())) {
return makeError(MetaCode::kNotFound, "iid not a config entry");
}
auto isSys = kidx < (int64_t)systemKeys.size();
if (!isSys) {
kidx -= (int)systemKeys.size();
}
auto config = isSys ? *config_ : getConfig(ui);
auto key = isSys ? systemKeys[kidx] : userKeys[kidx];
return meta::Inode{iid,
{meta::Symlink{config.find(key).value()->toString()},
meta::Acl{ui.uid, ui.gid, meta::Permission{isSys ? 0444 : 0400}}}};
}
std::pair<std::shared_ptr<std::vector<meta::DirEntry>>, std::shared_ptr<std::vector<std::optional<meta::Inode>>>>
UserConfig::listConfig(const meta::UserInfo &ui) {
meta::DirEntry de{meta::InodeId::getConf(), ""};
std::vector<meta::DirEntry> des;
std::vector<std::optional<meta::Inode>> ins;
des.reserve(systemKeys.size() + userKeys.size());
ins.reserve(systemKeys.size() + userKeys.size());
for (const auto &k : systemKeys) {
auto key = "sys." + k;
de.name = key;
des.emplace_back(de);
auto inode = *lookupConfig(key.data(), ui);
ins.emplace_back(std::move(inode));
}
for (const auto &k : userKeys) {
auto key = "usr." + k;
de.name = key;
des.emplace_back(de);
auto inode = *lookupConfig(key.data(), ui);
ins.emplace_back(std::move(inode));
}
return std::make_pair(std::make_shared<std::vector<meta::DirEntry>>(std::move(des)),
std::make_shared<std::vector<std::optional<meta::Inode>>>(std::move(ins)));
}
} // namespace hf3fs::fuse

64
src/fuse/UserConfig.h Normal file
View File

@@ -0,0 +1,64 @@
#pragma once
#include "FuseConfig.h"
#include "common/utils/AtomicSharedPtrTable.h"
#include "fbs/meta/Common.h"
#include "fbs/meta/Schema.h"
namespace hf3fs::fuse {
class UserConfig {
public:
UserConfig() = default;
void init(FuseConfig &config);
Result<meta::Inode> setConfig(const char *key, const char *val, const meta::UserInfo &ui);
Result<meta::Inode> lookupConfig(const char *key, const meta::UserInfo &ui);
Result<meta::Inode> statConfig(meta::InodeId iid, const meta::UserInfo &ui);
std::pair<std::shared_ptr<std::vector<meta::DirEntry>>, std::shared_ptr<std::vector<std::optional<meta::Inode>>>>
listConfig(const meta::UserInfo &ui);
public:
const FuseConfig &getConfig(const meta::UserInfo &ui);
public:
const std::vector<std::string> systemKeys{"storage.net_client.rdma_control.max_concurrent_transmission",
"periodic_sync.enable",
"periodic_sync.interval",
"periodic_sync.flush_write_buf",
"io_worker_coros.hi",
"io_worker_coros.lo",
"max_jobs_per_ioring",
"io_job_deq_timeout"};
const std::vector<std::string> userKeys{"enable_read_cache",
"readonly",
"dryrun_bench_mode",
"flush_on_stat",
"sync_on_stat",
"attr_timeout",
"entry_timeout",
"negative_timeout",
"symlink_timeout"};
private:
Result<std::pair<bool, int>> parseKey(const char *key);
meta::InodeId configIid(bool isGet, bool isSys, int kidx) {
return meta::InodeId{(isGet ? meta::InodeId::getConf() : meta::InodeId::setConf()).u64() - 1 -
(isSys ? 0 : systemKeys.size()) - kidx};
}
private:
FuseConfig *config_;
struct LocalConfig {
LocalConfig(const FuseConfig &globalConfig)
: config(globalConfig) {}
std::mutex mtx;
FuseConfig config;
std::vector<config::KeyValue> updatedItems;
};
std::unique_ptr<AtomicSharedPtrTable<LocalConfig>> configs_;
std::mutex userMtx_;
std::set<meta::Uid> users_;
private:
std::atomic<int> storageMaxConcXmit_;
};
} // namespace hf3fs::fuse

83
src/fuse/hf3fs_fuse.cpp Normal file
View File

@@ -0,0 +1,83 @@
#ifdef ENABLE_FUSE_APPLICATION
#include "FuseApplication.h"
int main(int argc, char *argv[]) {
gflags::AllowCommandLineReparsing();
using namespace hf3fs;
return fuse::FuseApplication().run(argc, argv);
}
#else
#include <folly/ScopeGuard.h>
#include "FuseConfig.h"
#include "FuseMainLoop.h"
#include "FuseOps.h"
#include "common/logging/LogInit.h"
using namespace hf3fs;
using namespace hf3fs::fuse;
DECLARE_string(cfg);
DECLARE_bool(use_local_cfg);
auto withRetry(auto &&f, std::string_view desc) {
using RetType = decltype(f());
auto retryInterval = std::chrono::milliseconds(10);
constexpr auto maxRetryInterval = std::chrono::milliseconds(1000);
std::optional<RetType> res;
for (int i = 0; i < 20; ++i) {
res = f();
if (*res) break;
XLOGF(CRITICAL, "{} failed: {}\nretryCount: {}", desc, res->error(), i);
std::this_thread::sleep_for(retryInterval);
retryInterval = std::min(2 * retryInterval, maxRetryInterval);
}
return *res;
}
int main(int argc, char *argv[]) {
gflags::AllowCommandLineReparsing();
FuseConfig hf3fsConfig;
hf3fsConfig.init(&argc, &argv);
auto ibResult = net::IBManager::start(hf3fsConfig.ib_devices());
XLOGF_IF(FATAL, !ibResult, "Failed to start IBManager: {}", ibResult.error());
SCOPE_EXIT { hf3fs::net::IBManager::stop(); };
auto logConfigStr = logging::generateLogConfig(hf3fsConfig.log(), String("hf3fs_fuse"));
XLOGF(INFO, "LogConfig: {}", logConfigStr);
logging::initOrDie(logConfigStr);
XLOGF(INFO, "{}", VersionInfo::full());
auto monitorResult = monitor::Monitor::start(hf3fsConfig.monitor());
XLOGF_IF(FATAL, !monitorResult, "Parse config file from flags failed: {}", monitorResult.error());
auto physicalHostnameRes = SysResource::hostname(/*physicalMachineName=*/true);
XLOGF_IF(FATAL, !physicalHostnameRes, "Get physical hostname failed: {}", physicalHostnameRes.error());
auto containerHostnameRes = SysResource::hostname(/*physicalMachineName=*/false);
XLOGF_IF(FATAL, !containerHostnameRes, "Get container hostname failed: {}", containerHostnameRes.error());
auto clientId = ClientId::random(*physicalHostnameRes);
flat::AppInfo appInfo;
appInfo.clusterId = hf3fsConfig.cluster_id();
appInfo.hostname = *physicalHostnameRes;
appInfo.pid = SysResource::pid();
appInfo.releaseVersion = flat::ReleaseVersion::fromVersionInfo();
auto &d = getFuseClientsInstance();
if (auto res = d.init(appInfo, hf3fsConfig.mountpoint(), hf3fsConfig.token_file(), hf3fsConfig); !res) {
XLOGF(FATAL, "Init fuse clients failed: {}", res.error());
}
SCOPE_EXIT { d.stop(); };
return fuseMainLoop(argv[0],
hf3fsConfig.allow_other(),
hf3fsConfig.mountpoint(),
hf3fsConfig.io_bufs().max_buf_size(),
hf3fsConfig.cluster_id());
}
#endif