Merge branch 'deepseek-ai:main' into main

This commit is contained in:
zhaohaidao 2025-03-03 17:39:56 +08:00 committed by GitHub
commit b64cab3084
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
56 changed files with 518 additions and 129 deletions

View File

@ -21,9 +21,9 @@ jobs:
- name: Prepare
run: |
sudo apt install -y cmake libuv1-dev liblz4-dev liblzma-dev libdouble-conversion-dev libprocps-dev libdwarf-dev libunwind-dev
sudo apt install -y cmake libuv1-dev liblz4-dev liblzma-dev libdouble-conversion-dev libdwarf-dev libunwind-dev
sudo apt install -y libaio-dev libgflags-dev libgoogle-glog-dev libgtest-dev libgmock-dev clang-format-14 clang-14 clang-tidy-14 lld-14
sudo apt install -y libgoogle-perftools-dev google-perftools libssl-dev ccache gcc-12 g++-12 libboost-all-dev meson rustc cargo
sudo apt install -y libgoogle-perftools-dev google-perftools libssl-dev gcc-12 g++-12 libboost-all-dev meson rustc cargo
wget https://github.com/apple/foundationdb/releases/download/7.1.61/foundationdb-clients_7.1.61-1_amd64.deb && sudo dpkg -i foundationdb-clients_7.1.61-1_amd64.deb
git clone https://github.com/libfuse/libfuse.git libfuse -b fuse-3.16.2 --depth 1 && mkdir libfuse/build && cd libfuse/build && meson setup .. && ninja && sudo ninja install && cd ../.. && rm -rf libfuse
git submodule update --init --recursive

View File

@ -9,6 +9,7 @@ resolver = "2"
authors = ["dev <noreply@deepseek.com>"]
edition = "2021"
license = "MIT"
rust-version = "1.75.0" # MSRV
[profile.release-cmake]
debug = true

View File

@ -11,7 +11,7 @@ The Fire-Flyer File System (3FS) is a high-performance distributed file system d
- **File Interfaces** Develops stateless metadata services backed by a transactional key-value store (e.g., FoundationDB). The file interface is well known and used everywhere. There is no need to learn a new storage API.
- Diverse Workloads
- **Data Preparation** Organizes outputs of data analytics pipelines into hierarchical directory structures and manages large volume of intermediate outputs efficiently.
- **Data Preparation** Organizes outputs of data analytics pipelines into hierarchical directory structures and manages a large volume of intermediate outputs efficiently.
- **Dataloaders** Eliminates the need for prefetching or shuffling datasets by enabling random access to training samples across compute nodes.
- **Checkpointing** Supports high-throughput parallel checkpointing for large-scale training.
- **KVCache for Inference** Provides a cost-effective alternative to DRAM-based caching, offering high throughput and significantly larger capacity.
@ -31,6 +31,8 @@ The following figure demonstrates the throughput of read stress test on a large
![Large block read throughput under stress test on a 180-node cluster](docs/images/peak_throughput.jpg)
To benchmark 3FS, please use our [fio engine for USRBIO](benchmarks/fio_usrbio/README.md).
### 2. GraySort
We evaluated [smallpond](https://github.com/deepseek-ai/smallpond) using the GraySort benchmark, which measures sort performance on large-scale datasets. Our implementation adopts a two-phase approach: (1) partitioning data via shuffle using the prefix bits of keys, and (2) in-partition sorting. Both phases read/write data from/to 3FS.
@ -43,18 +45,18 @@ The test cluster comprised 25 storage nodes (2 NUMA domains/node, 1 storage serv
### 3. KVCache
KVCache is a technique used to optimize the LLM inference process. It avoids redundant computations by caching the key and value vectors of previous tokens in the decoder layers.
The top figure demonstrates the read throughput of all KVCache clients, highlighting both peak and average values, with peak throughput reaching up to 40 GiB/s. The bottom figure presents the IOPS of remove ops from garbage collection (GC) during the same time period.
The top figure demonstrates the read throughput of all KVCache clients (1×400Gbps NIC/node), highlighting both peak and average values, with peak throughput reaching up to 40 GiB/s. The bottom figure presents the IOPS of removing ops from garbage collection (GC) during the same time period.
![KVCache Read Throughput](./docs/images/kvcache_read_throughput.png)
![KVCache GC IOPS](./docs/images/kvcache_gc_iops.png)
## Check out source code
Clone 3FS repository from github:
Clone 3FS repository from GitHub:
git clone https://github.com/deepseek-ai/3fs
When `deepseek-ai/3fs` has been cloned to local file system, run the
When `deepseek-ai/3fs` has been cloned to a local file system, run the
following commands to check out the submodules:
```bash
@ -69,21 +71,21 @@ Install dependencies:
```bash
# for Ubuntu 20.04.
apt install cmake libuv1-dev liblz4-dev liblzma-dev libdouble-conversion-dev libprocps-dev libdwarf-dev libunwind-dev \
apt install cmake libuv1-dev liblz4-dev liblzma-dev libdouble-conversion-dev libdwarf-dev libunwind-dev \
libaio-dev libgflags-dev libgoogle-glog-dev libgtest-dev libgmock-dev clang-format-14 clang-14 clang-tidy-14 lld-14 \
libgoogle-perftools-dev google-perftools libssl-dev ccache libclang-rt-14-dev gcc-10 g++-10 libboost1.71-all-dev
libgoogle-perftools-dev google-perftools libssl-dev libclang-rt-14-dev gcc-10 g++-10 libboost1.71-all-dev
# for Ubuntu 22.04.
apt install cmake libuv1-dev liblz4-dev liblzma-dev libdouble-conversion-dev libprocps-dev libdwarf-dev libunwind-dev \
apt install cmake libuv1-dev liblz4-dev liblzma-dev libdouble-conversion-dev libdwarf-dev libunwind-dev \
libaio-dev libgflags-dev libgoogle-glog-dev libgtest-dev libgmock-dev clang-format-14 clang-14 clang-tidy-14 lld-14 \
libgoogle-perftools-dev google-perftools libssl-dev ccache gcc-12 g++-12 libboost-all-dev
libgoogle-perftools-dev google-perftools libssl-dev gcc-12 g++-12 libboost-all-dev
```
Install other build prerequisites:
- [`libfuse`](https://github.com/libfuse/libfuse/releases/tag/fuse-3.16.1) 3.16.1 or newer version
- [FoundationDB](https://apple.github.io/foundationdb/getting-started-linux.html) 7.1 or newer version
- [Rust](https://www.rust-lang.org/tools/install) toolchain
- [Rust](https://www.rust-lang.org/tools/install) toolchain: minimal 1.75.0, recommanded 1.85.0 or newer version (latest stable version)
## Build 3FS

View File

@ -0,0 +1,23 @@
HF3FS_INCLUDE_DIR ?= /usr/include
HF3FS_LIB_DIR ?= /usr/lib
FIO_SRC_DIR ?= /usr/include
PLUGIN_NAME = hf3fs_usrbio
SO_NAME = ${PLUGIN_NAME}.so
SRC = ${PLUGIN_NAME}.cpp
OBJ = ${PLUGIN_NAME}.o
CXX = g++
CXXFLAGS = -fPIC -fpermissive -O3 -D_GNU_SOURCE -shared -rdynamic -I${HF3FS_INCLUDE_DIR} -I${FIO_SRC_DIR} -include config-host.h
LDFLAGS = -L${HF3FS_LIB_DIR} -lhf3fs_api_shared -Wl,-rpath=${HF3FS_LIB_DIR}
.PHONY: all clean
all: ${SO_NAME}
${SO_NAME}: ${SRC}
${CXX} ${CXXFLAGS} $^ -o $@ ${LDFLAGS}
clean:
rm -rf ${OBJ} ${SO_NAME}

View File

@ -0,0 +1,35 @@
# FIO engine for 3FS USRBIO
This repository contains the [fio] external plugin used for benchmarking [3FS] USRBIO.
## Build
First, build 3FS and fio.
Configure the following variables:
- `HF3FS_LIB_DIR`: directory contains `libhf3fs_api_shared.so`, the default path in 3FS repo is `3FS/build/src/lib/api`.
- `HF3FS_INCLUDE_DIR`: directory contains `hf3fs_usrbio.h`, the default path in 3FS repo is `3FS/src/lib/api`.
- `FIO_SRC_DIR`: directory contains `config-host.h`. After building fio, this header will be in the root of the fio repo.
Then run:
```
make HF3FS_LIB_DIR=${HF3FS_LIB_DIR} HF3FS_INCLUDE_DIR=${HF3FS_INCLUDE_DIR} FIO_SRC_DIR=${FIO_SRC_DIR}
```
You will get the external plugin as `hf3fs_usrbio.so`.
## Usage
To use this plugin, set the `ioengine` args in fio as `external:hf3fs_usrbio.so`. Please refer to [fio documentation] for further explanation.
To benchmarking batched small I/Os, please set these four parameters to `batch_size` simultaneously:
```
iodepth=1024
iodepth_batch_submit=1024
iodepth_batch_complete_min=1024
iodepth_batch_complete_max=1024
```
[fio]: https://github.com/axboe/fio
[3FS]: https://github.com/deepseek-ai/3FS
[fio documentation]: https://fio.readthedocs.io/en/latest/fio_doc.html

View File

@ -0,0 +1,286 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <unistd.h>
#include <chrono>
#include <iomanip>
#include <ctime>
#include "hf3fs_usrbio.h"
#include <atomic>
#include <errno.h>
#include <vector>
#include <sys/stat.h>
extern "C" {
#include "fio.h"
#include "optgroup.h"
}
struct hf3fs_usrbio_options {
int dummy;
char *mountpoint;
int ior_depth;
int ior_timeout;
};
static struct fio_option options[] = {
{
.name = "mountpoint",
.lname = "hf3fs mount point",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct hf3fs_usrbio_options, mountpoint),
.help = "Mount point (e.g. /hf3fs/mount/point)",
.def = "",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_NETIO,
},
{
.name = "ior_depth",
.lname = "hf3fs ior depth",
.type = FIO_OPT_INT,
.off1 = offsetof(struct hf3fs_usrbio_options, ior_depth),
.help = "Ior depth",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_NETIO,
},
{
.name = "ior_timeout",
.lname = "hf3fs ior timeout (in ms)",
.type = FIO_OPT_INT,
.off1 = offsetof(struct hf3fs_usrbio_options, ior_timeout),
.help = "Ior timeout",
.def = "1",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_NETIO,
},
{
.name = NULL,
},
};
#define LAST_POS(f) ((f)->engine_pos)
struct hf3fs_usrbio_data {
struct hf3fs_iov iov;
struct hf3fs_ior ior_r;
struct hf3fs_ior ior_w;
std::vector<struct io_u *> io_us;
int queued;
int events;
enum fio_ddir last_ddir;
};
static int hf3fs_usrbio_init(struct thread_data *td) {
td->io_ops_data = static_cast<void *>(new hf3fs_usrbio_data);
struct hf3fs_usrbio_options *options = td->eo;
auto &ior_r = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_r;
auto res = hf3fs_iorcreate3(&ior_r, options->mountpoint, td->o.iodepth, true, options->ior_depth, 0, options->ior_timeout, -1);
if (res < 0) {
return res;
}
auto &ior_w = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_w;
res = hf3fs_iorcreate(&ior_w, options->mountpoint, td->o.iodepth, false, options->ior_depth, -1);
if (res < 0) {
return res;
}
auto *data = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
data->io_us.resize(td->o.iodepth);
data->queued = 0;
data->events = 0;
return 0;
}
static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret) {
if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir)) {
LAST_POS(io_u->file) = io_u->offset + ret;
}
if (ret != (int) io_u->xfer_buflen) {
if (ret >= 0) {
io_u->resid = io_u->xfer_buflen - ret;
io_u->error = 0;
return FIO_Q_COMPLETED;
} else {
io_u->error = errno;
}
}
if (io_u->error) {
io_u_log_error(td, io_u);
td_verror(td, io_u->error, "xfer");
}
return FIO_Q_COMPLETED;
}
static enum fio_q_status hf3fs_usrbio_queue(struct thread_data *td, struct io_u *io_u) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
auto *sd = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
if (io_u->ddir != sd->last_ddir) {
if (sd->queued != 0) {
return FIO_Q_BUSY;
} else {
vec[sd->queued++] = io_u;
sd->last_ddir = io_u->ddir;
return FIO_Q_QUEUED;
}
} else {
if (sd->queued == td->o.iodepth) {
return FIO_Q_BUSY;
}
vec[sd->queued++] = io_u;
return FIO_Q_QUEUED;
}
}
static int hf3fs_usrbio_commit(struct thread_data *td) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
auto *sd = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
auto &ior_r = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_r;
auto &ior_w = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_w;
auto &iov = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->iov;
if (sd->queued == 0) {
return 0;
}
io_u_mark_submit(td, sd->queued);
int res = 0;
bool read = (sd->last_ddir == DDIR_READ);
auto &ior = read ? ior_r : ior_w;
for (int i = 0; i < sd->queued; i++) {
res = hf3fs_prep_io(&ior, &iov, read, vec[i]->xfer_buf, vec[i]->file->fd, vec[i]->offset, vec[i]->xfer_buflen, nullptr);
if (res < 0) {
std::cout << "prep " << res << " " << vec[i]->file->fd << std::endl;
return res;
}
}
res = hf3fs_submit_ios(&ior);
if (res < 0) {
std::cout << "submit " << res << std::endl;
return res;
}
std::vector<struct hf3fs_cqe> cqe(sd->queued);
res = hf3fs_wait_for_ios(&ior, cqe.data(), sd->queued, sd->queued, nullptr);
if (res < 0) {
std::cout << "wait " << res << std::endl;
return res;
}
for (int i = 0; i < sd->queued; i++) {
if (cqe[i].result < 0) {
std::cout << "cqe error " << res << std::endl;
return res;
}
}
sd->events = sd->queued;
sd->queued = 0;
return 0;
}
static int hf3fs_usrbio_getevents(struct thread_data *td, unsigned int min, unsigned int max, const struct timespec fio_unused *t) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
auto *sd = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
int ret = 0;
if (min) {
ret = sd->events;
sd->events = 0;
}
return ret;
}
static struct io_u *hf3fs_usrbio_event(struct thread_data *td, int event) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
return vec[event];
}
static void hf3fs_usrbio_cleanup(struct thread_data *td) {
delete static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
}
static int hf3fs_usrbio_open(struct thread_data *td, struct fio_file *f) {
int flags = 0;
if (td_write(td)) {
if (!read_only) {
flags = O_RDWR;
}
} else if (td_read(td)) {
if (!read_only) {
flags = O_RDWR;
} else {
flags = O_RDONLY;
}
}
f->fd = open(f->file_name, flags);
hf3fs_reg_fd(f->fd, 0);
td->o.open_files++;
return 0;
}
static int hf3fs_usrbio_close(struct thread_data *td, struct fio_file *f) {
hf3fs_dereg_fd(f->fd);
close(f->fd);
f->fd = -1;
return 0;
}
static int hf3fs_usrbio_alloc(struct thread_data *td, size_t total_mem) {
struct hf3fs_usrbio_options *options = td->eo;
auto &iov = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->iov;
auto res = hf3fs_iovcreate(&iov, options->mountpoint, total_mem, 0, -1);
if (res < 0) {
return res;
}
td->orig_buffer = iov.base;
return 0;
}
static void hf3fs_usrbio_free(struct thread_data *td) {
auto &iov = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->iov;
hf3fs_iovdestroy(&iov);
}
static int hf3fs_invalidate(struct thread_data *td, struct fio_file *f) {
return 0;
}
extern "C" {
static struct ioengine_ops ioengine;
void get_ioengine(struct ioengine_ops **ioengine_ptr) {
*ioengine_ptr = &ioengine;
ioengine.name = "hf3fs_usrbio",
ioengine.version = FIO_IOOPS_VERSION;
ioengine.flags = FIO_SYNCIO | FIO_NODISKUTIL;
ioengine.init = hf3fs_usrbio_init;
ioengine.queue = hf3fs_usrbio_queue;
ioengine.commit = hf3fs_usrbio_commit;
ioengine.getevents = hf3fs_usrbio_getevents;
ioengine.event = hf3fs_usrbio_event;
ioengine.cleanup = hf3fs_usrbio_cleanup;
ioengine.open_file = hf3fs_usrbio_open;
ioengine.close_file = hf3fs_usrbio_close;
ioengine.invalidate = hf3fs_invalidate;
ioengine.get_file_size = generic_get_file_size;
ioengine.iomem_alloc = hf3fs_usrbio_alloc;
ioengine.iomem_free = hf3fs_usrbio_free;
ioengine.option_struct_size = sizeof(struct hf3fs_usrbio_options);
ioengine.options = options;
}
}

View File

@ -124,7 +124,7 @@ bool runBenchmarks() {
auto nodeIdStr = nodeEndpointStrs[0];
auto endpointStr = nodeEndpointStrs[1];
NodeId nodeId{std::stoul(nodeIdStr)};
auto nodeId = (NodeId)std::stoul(nodeIdStr);
auto endpoint = net::Address::fromString(endpointStr);
storageEndpoints[nodeId] = endpoint;
XLOGF(WARN, "Add storage endpoint: {} @ {}", nodeId, endpoint);

View File

@ -2,6 +2,7 @@
#include <boost/algorithm/string.hpp>
#include <boost/core/ignore_unused.hpp>
#include <boost/filesystem/string_file.hpp>
#include <common/utils/UtcTime.h>
#include <folly/experimental/coro/Collect.h>
#include <folly/futures/Barrier.h>

View File

@ -267,10 +267,11 @@ Install `storage` service on **storage** node.
```bash
/opt/3fs/bin/admin_cli -cfg /opt/3fs/etc/admin_cli.toml --config.mgmtd_client.mgmtd_server_addresses '["RDMA://192.168.1.1:8000"]' "user-add --root --admin 0 root"
```
Save the admin token to `/opt/3fs/etc/token.txt`.
The admin token is printed to the console, save it to `/opt/3fs/etc/token.txt`.
2. Generate `admin_cli` commands to create storage targets on 5 storage nodes (16 SSD per node, 6 targets per SSD).
- Follow instructions at [here](data_placement/README.md) to install Python packages.
```bash
pip install -r ~/3fs/deploy/data_placement/requirements.txt
python ~/3fs/deploy/data_placement/src/model/data_placement.py \
-ql -relax -type CR --num_nodes 5 --replication_factor 3 --min_targets_per_disk 6
python ~/3fs/deploy/data_placement/src/setup/gen_chain_table.py \

View File

@ -6,9 +6,21 @@ Suppose we are going to setup a small 3FS cluster:
- 16 SSDs attached to each node
- 6 storage targets on each SSD
## Install dependencies
The data placement problem is formulated using [Pyomo](https://www.pyomo.org/) and solved with the [HiGHS](https://highs.dev/) solver. Install them and other dependencies:
```bash
$ cd deploy/data_placement
$ pip install -r requirements.txt
```
## Generate chain table
First generate a solution of the data placement problem.
```bash
$ cd deploy/data_placement
$ python src/model/data_placement.py -ql -relax -type CR --num_nodes 5 --replication_factor 3 --min_targets_per_disk 6 --init_timelimit 600
...
@ -45,7 +57,7 @@ $ python src/model/data_placement.py -ql -relax -type CR --num_nodes 5 --replica
Note that some combinations of `--num_nodes` and `--replication_factor` may have no solution.
Then generate commands to create/remove storage targets.
Then generate commands to create/remove storage targets and the chain table.
```bash
$ python src/setup/gen_chain_table.py --chain_table_type CR --node_id_begin 10001 --node_id_end 10005 --num_disks_per_node 16 --num_targets_per_disk 6 --incidence_matrix_path output/DataPlacementModel-v_5-b_10-r_6-k_3-λ_2-lb_1-ub_1/incidence_matrix.pickle

View File

@ -2,7 +2,7 @@
## Design and implementation
The 3FS system has four components: cluster manager, metadata service, storage service and client. All components are connected in a RDMA network (InfiniBand or RoCE).
The 3FS system has four components: cluster manager, metadata service, storage service and client. All components are connected in an RDMA network (InfiniBand or RoCE).
Metadata and storage services send heartbeats to cluster manager. Cluster manager handles membership changes and distributes cluster configuration to other services and clients. Multiple cluster managers are deployed and one of them is elected as the primary. Another manager is promoted as primary when the primary fails. Cluster configuration is typically stored in a reliable distributed coordination service, such as ZooKeeper or etcd. In our production environment, we use the same key-value store as file metadata to reduce dependencies.
@ -245,7 +245,7 @@ When a previously offline storage service starts:
When a storage service finds a previously offline successor is online:
1. The service starts to forward normal write requests to the successor. Clients may only update a portion of the chunk, but the forwarded write requests should contains the whole chunk, i.e. a full-chunk-replace write.
1. The service starts to forward normal write requests to the successor. Clients may only update a portion of the chunk, but the forwarded write requests should contain the whole chunk, i.e. a full-chunk-replace write.
2. The service sends a dump-chunkmeta request to the successor. Once the metadata of all chunks on the successor target are received, it collects the chunk metadata on its local target. Then it compares the two copies of chunk metadata to decide which chunks should be transferred.
@ -277,7 +277,7 @@ File chunks are stored in the chunk engine. On each SSD, the persistent storage
1. *open/close* Initializes the engine by loading metadata from RocksDB and reconstructing chunk allocator states.
2. get: Retrieves chunk metadata and reference-counted handle through a hashmap cache, enabling concurrent access with O(1) average complexity.
2. *get* Retrieves chunk metadata and reference-counted handle through a hashmap cache, enabling concurrent access with O(1) average complexity.
3. *update* Implements copy-on-write (COW) semantics by allocating new chunks before modifying data. Old chunks remain readable until all handles are released.

View File

@ -2,7 +2,7 @@
## Build prerequisites
Follow the [offcial guide](https://p-org.github.io/P/getstarted/install/) to install the [P](https://github.com/p-org/P) framework.
Follow the [official guide](https://p-org.github.io/P/getstarted/install/) to install the [P](https://github.com/p-org/P) framework.
Or if `dotnet` has been installed, run the following command to store the `p` command.
```

View File

@ -1,6 +1,7 @@
#include "DumpChainTable.h"
#include <folly/Conv.h>
#include <fstream>
#include "AdminEnv.h"
#include "client/cli/common/Dispatcher.h"

View File

@ -1,6 +1,7 @@
#include "DumpChains.h"
#include <folly/Conv.h>
#include <fstream>
#include "AdminEnv.h"
#include "client/cli/common/Dispatcher.h"

View File

@ -1,6 +1,7 @@
#include "DumpChunkMeta.h"
#include <folly/logging/xlog.h>
#include <fstream>
#include <vector>
#include "AdminEnv.h"

View File

@ -8,6 +8,7 @@
#include <folly/experimental/coro/Invoke.h>
#include <folly/futures/Future.h>
#include <folly/logging/xlog.h>
#include <fstream>
#include <memory>
#include <utility>
#include <vector>
@ -338,4 +339,4 @@ CoTryTask<void> registerDumpInodesHandler(Dispatcher &dispatcher) {
co_return co_await dispatcher.registerHandler(getParser, dumpInodes);
}
} // namespace hf3fs::client::cli
} // namespace hf3fs::client::cli

View File

@ -1,6 +1,7 @@
#include "RenderConfig.h"
#include <folly/Conv.h>
#include <fstream>
#include "AdminEnv.h"
#include "client/cli/common/Dispatcher.h"

View File

@ -139,7 +139,7 @@ template <>
struct formatter<hf3fs::meta::client::ServerSelectionStrategy::NodeInfo> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::meta::client::ServerSelectionStrategy::NodeInfo &node, FormatContext &ctx) const {
return format_to(ctx.out(), "[{}]@{}.{}", node.address, node.nodeId, node.hostname);
return fmt::format_to(ctx.out(), "[{}]@{}.{}", node.address, node.nodeId, node.hostname);
}
};

View File

@ -578,15 +578,15 @@ template <>
struct formatter<hf3fs::storage::client::RoutingTarget> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::storage::client::RoutingTarget &routingTarget, FormatContext &ctx) const {
return format_to(ctx.out(),
"{}@{}@{}:{}@{}:{}#{}",
routingTarget.chainId,
routingTarget.chainVer,
routingTarget.routingInfoVer,
routingTarget.targetInfo.targetId,
routingTarget.targetInfo.nodeId,
routingTarget.channel.id,
routingTarget.channel.seqnum);
return fmt::format_to(ctx.out(),
"{}@{}@{}:{}@{}:{}#{}",
routingTarget.chainId,
routingTarget.chainVer,
routingTarget.routingInfoVer,
routingTarget.targetInfo.targetId,
routingTarget.targetInfo.nodeId,
routingTarget.channel.id,
routingTarget.channel.seqnum);
}
};

View File

@ -1818,7 +1818,7 @@ CoTryTask<void> StorageClientImpl::batchWriteWithoutRetry(ClientRequestContext &
config_.traffic_control().write().max_batch_bytes(),
config_.traffic_control().write().random_shuffle_requests());
// create batch read request and communicate with storage service
// create batch write request and communicate with storage service
auto sendReq =
[&, this](size_t batchIndex, const NodeId &nodeId, const std::vector<WriteIO *> &batchIOs) -> CoTask<bool> {

View File

@ -12,8 +12,8 @@ static monitor::CountRecorder num_update_channels_total{"storage_client.num_upda
UpdateChannelAllocator::UpdateChannelAllocator(size_t numChannels)
: numChannels_(std::min(kMaxNumChannels, numChannels)) {
std::scoped_lock lock(availableChannelMutex_);
for (uint32_t id = numChannels_; id > 0; id--) {
availableChannelIds_.push(ChannelId{id});
for (auto id = (ChannelId)numChannels_; id > 0; id--) {
availableChannelIds_.push(id);
num_update_channels_total.addSample(1);
}
}

View File

@ -4,6 +4,7 @@ version = "0.1.0"
authors.workspace = true
edition.workspace = true
license.workspace = true
rust-version.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -598,7 +598,11 @@ struct Opt {
paths: Vec<PathBuf>,
/// Interval in seconds
#[structopt(short, long, help = "Scan interval (in seconds), exit after one scan if set to 0")]
#[structopt(
short,
long,
help = "Scan interval (in seconds), exit after one scan if set to 0"
)]
interval: u64,
#[structopt(long)]
@ -615,7 +619,11 @@ struct Opt {
#[structopt(long, default_value = "info", help = "Log level, default is info")]
log_level: Level,
#[structopt(long, default_value = "warn", help = "stdout log level, default is warn")]
#[structopt(
long,
default_value = "warn",
help = "stdout log level, default is warn"
)]
stdout_level: Level,
}

View File

@ -139,7 +139,7 @@ template <>
struct formatter<hf3fs::flat::ReleaseVersion> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::flat::ReleaseVersion val, FormatContext &ctx) const -> decltype(ctx.out()) {
return format_to(ctx.out(), "{}", val.toString());
return fmt::format_to(ctx.out(), "{}", val.toString());
}
};
@ -148,9 +148,9 @@ struct formatter<hf3fs::flat::TagPair> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::flat::TagPair val, FormatContext &ctx) const -> decltype(ctx.out()) {
if (val.value.empty()) {
return format_to(ctx.out(), "{}", val.key);
return fmt::format_to(ctx.out(), "{}", val.key);
}
return format_to(ctx.out(), "{}={}", val.key, val.value);
return fmt::format_to(ctx.out(), "{}={}", val.key, val.value);
}
};

View File

@ -51,7 +51,7 @@ template <>
struct formatter<hf3fs::ClientId> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::ClientId &clientId, FormatContext &ctx) const {
return format_to(ctx.out(), "ClientId({})@{}", clientId.uuid.toHexString(), clientId.hostname);
return fmt::format_to(ctx.out(), "ClientId({})@{}", clientId.uuid.toHexString(), clientId.hostname);
}
};

View File

@ -123,9 +123,10 @@ class MemTransaction : public IReadWriteTransaction {
if (canceled_) {
co_return makeError(TransactionCode::kCanceled, "Canceled transaction!");
}
if (offset + 10 > key.size()) {
co_return makeError(StatusCode::kInvalidArg,
fmt::format("setVersionstampedKey: {} + 10 > key.size {}", offset, key.size()));
if (offset + sizeof(kv::Versionstamp) > key.size()) {
co_return makeError(
StatusCode::kInvalidArg,
fmt::format("setVersionstampedKey: {} + sizeof(kv::Versionstamp) > key.size {}", offset, key.size()));
}
versionstampedChanges_.push_back(
mem::MemKV::VersionstampedKV::versionstampedKey(std::string(key), offset, std::string(value)));
@ -137,9 +138,10 @@ class MemTransaction : public IReadWriteTransaction {
if (canceled_) {
co_return makeError(TransactionCode::kCanceled, "Canceled transaction!");
}
if (offset + 10 > value.size()) {
co_return makeError(StatusCode::kInvalidArg,
fmt::format("setVersionstampedValue: {} + 10 > value.size {}", offset, value.size()));
if (offset + sizeof(kv::Versionstamp) > value.size()) {
co_return makeError(
StatusCode::kInvalidArg,
fmt::format("setVersionstampedValue: {} + sizeof(kv::Versionstamp) > value.size {}", offset, value.size()));
}
versionstampedChanges_.push_back(
mem::MemKV::VersionstampedKV::versionstampedValue(std::string(key), std::string(value), offset));

View File

@ -171,7 +171,7 @@ static_assert(serde::SerializableToBytes<IBConnectRsp> && serde::SerializableToJ
template <>
struct hf3fs::serde::SerdeMethod<ibv_gid> {
static constexpr std::string_view serdeTo(const ibv_gid &gid) {
static std::string_view serdeTo(const ibv_gid &gid) {
return std::string_view((const char *)&gid.raw[0], sizeof(ibv_gid::raw));
}
static Result<ibv_gid> serdeFrom(std::string_view s) {

View File

@ -1107,7 +1107,7 @@ IBSocket::Drainer::Ptr IBSocket::Drainer::create(IBSocket::Ptr socket, std::weak
IBSocket::Drainer::Drainer(IBSocket::Ptr socket, std::weak_ptr<IBSocketManager> manager)
: socket_(std::move(socket)),
manager_(manager) {
manager_(std::move(manager)) {
draining.addSample(1);
}

View File

@ -570,7 +570,7 @@ class IBSocketManager : public EventLoop::EventHandler, public std::enable_share
private:
friend class IBSocket::Drainer;
void remove(IBSocket::Drainer::Ptr drainer) { drainers_.lock()->erase(drainer); }
void remove(const IBSocket::Drainer::Ptr &drainer) { drainers_.lock()->erase(drainer); }
FdWrapper timer_;
folly::Synchronized<std::set<IBSocket::Drainer::Ptr>, std::mutex> drainers_;

View File

@ -587,7 +587,7 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
int rows = (plen + l->len + l->cols - 1) / l->cols; /* rows used by current buf. */
int rpos = (plen + l->oldpos + l->cols) / l->cols; /* cursor relative row. */
int rpos2; /* rpos after refresh. */
int col; /* colum position, zero-based. */
int col; /* column position, zero-based. */
int old_rows = l->oldrows;
int fd = l->ofd, j;
struct abuf ab;
@ -1331,4 +1331,4 @@ int linenoiseHistoryLoad(const char *filename) {
}
fclose(fp);
return 0;
}
}

View File

@ -37,9 +37,9 @@ struct formatter<hf3fs::OptionalFmt<T>> : formatter<T> {
template <typename FormatContext>
auto format(const hf3fs::OptionalFmt<T> &op, FormatContext &ctx) const {
if (op.val_.has_value()) {
return format_to(ctx.out(), "{}", *op.val_);
return fmt::format_to(ctx.out(), "{}", *op.val_);
} else {
return format_to(ctx.out(), "std::nullopt");
return fmt::format_to(ctx.out(), "std::nullopt");
}
}
};

View File

@ -151,7 +151,7 @@ template <typename T>
struct formatter<hf3fs::Result<T>> : formatter<T> {
template <typename FormatContext>
auto format(const hf3fs::Result<T> &result, FormatContext &ctx) const {
if (result.hasError()) return format_to(ctx.out(), "{}", result.error());
if (result.hasError()) return fmt::format_to(ctx.out(), "{}", result.error());
return formatter<T>::format(result.value(), ctx);
}
};

View File

@ -203,14 +203,6 @@ static Counts &counts() {
#define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
#endif
// workaround missing "is_trivially_copyable" in g++ < 5.0
// See https://stackoverflow.com/a/31798726/48181
#if defined(__GNUC__) && __GNUC__ < 5
#define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
#else
#define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
#endif
// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
@ -1382,7 +1374,7 @@ class Table
: static_cast<size_t>(std::distance(mKeyVals, reinterpret_cast_no_cast_align_warning<Node *>(mInfo)));
}
void cloneData(const Table &o) { Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this); }
void cloneData(const Table &o) { Cloner<Table, IsFlat && std::is_trivially_copyable<Node>::value>()(o, *this); }
// inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
// @return True on success, false if something went wrong

View File

@ -8,8 +8,15 @@
#include <memory>
#include <string_view>
#include "folly/Portability.h"
namespace hf3fs {
#if !FOLLY_X64 && !FOLLY_AARCH64
#error "The platform must be 64bit!"
#endif
static_assert(std::endian::native == std::endian::little);
// `Status` imitates `abseil::Status` which contains:
// - code
// - (optional) message
@ -101,7 +108,6 @@ class [[nodiscard]] Status {
private:
static_assert(StatusCode::kOK == 0, "StatusCode::kOK must be 0!");
static_assert(__x86_64__, "The platform must be 64bit!");
static_assert(sizeof(status_code_t) == 2, "The width of status_code_t must be 16b");
static constexpr auto kPtrBits = 48u;
@ -161,9 +167,9 @@ struct formatter<hf3fs::Status> : formatter<hf3fs::status_code_t> {
auto format(const hf3fs::Status &status, FormatContext &ctx) const {
auto msg = status.message();
if (msg.empty()) {
return format_to(ctx.out(), "{}({})", hf3fs::StatusCode::toString(status.code()), status.code());
return fmt::format_to(ctx.out(), "{}({})", hf3fs::StatusCode::toString(status.code()), status.code());
}
return format_to(ctx.out(), "{}({}) {}", hf3fs::StatusCode::toString(status.code()), status.code(), msg);
return fmt::format_to(ctx.out(), "{}({}) {}", hf3fs::StatusCode::toString(status.code()), status.code(), msg);
}
};

View File

@ -109,9 +109,9 @@ template <hf3fs::StrongTyped T>
struct formatter<T> : formatter<typename T::UnderlyingType> {
template <typename FormatContext>
auto format(const T &strongTyped, FormatContext &ctx) const {
format_to(ctx.out(), "{}(", T::kTypeName);
fmt::format_to(ctx.out(), "{}(", T::kTypeName);
formatter<typename T::UnderlyingType>::format(strongTyped.toUnderType(), ctx);
return format_to(ctx.out(), ")");
return fmt::format_to(ctx.out(), ")");
}
};
@ -120,11 +120,11 @@ struct formatter<std::optional<T>> : formatter<typename T::UnderlyingType> {
template <typename FormatContext>
auto format(const std::optional<T> &strongTyped, FormatContext &ctx) const {
if (strongTyped.has_value()) {
format_to(ctx.out(), "{}(", T::kTypeName);
fmt::format_to(ctx.out(), "{}(", T::kTypeName);
formatter<typename T::UnderlyingType>::format(strongTyped->toUnderType(), ctx);
return format_to(ctx.out(), ")");
return fmt::format_to(ctx.out(), ")");
} else {
return format_to(ctx.out(), "{}(std::nullopt)", T::kTypeName);
return fmt::format_to(ctx.out(), "{}(std::nullopt)", T::kTypeName);
}
}
};

View File

@ -7,7 +7,7 @@ namespace hf3fs::serde {
template <>
struct SerdeMethod<UtcTime> {
static constexpr auto serdeTo(UtcTime t) { return t.toMicroseconds(); }
static auto serdeTo(UtcTime t) { return t.toMicroseconds(); }
static Result<UtcTime> serdeFrom(int64_t t) { return UtcTime::fromMicroseconds(t); }
};

View File

@ -1,5 +1,6 @@
#pragma once
#include <cstdint>
#include <string_view>
namespace hf3fs {

View File

@ -80,11 +80,11 @@ struct formatter<hf3fs::flat::UserInfo> : formatter<std::string_view> {
auto format(const hf3fs::flat::UserInfo &user, FormatContext &ctx) const {
auto groups = transformTo<std::vector>(std::span{user.groups.begin(), user.groups.size()},
[](hf3fs::flat::Gid gid) { return gid.toUnderType(); });
return format_to(ctx.out(),
"(uid {}, gid {}, groups ({}))",
user.uid.toUnderType(),
user.gid.toUnderType(),
fmt::join(groups.begin(), groups.end(), ","));
return fmt::format_to(ctx.out(),
"(uid {}, gid {}, groups ({}))",
user.uid.toUnderType(),
user.gid.toUnderType(),
fmt::join(groups.begin(), groups.end(), ","));
}
};

View File

@ -436,7 +436,7 @@ template <>
struct formatter<hf3fs::meta::InodeType> : formatter<std::string_view> {
template <typename FormatContext>
auto format(hf3fs::meta::InodeType type, FormatContext &ctx) const {
return format_to(ctx.out(), "{}", magic_enum::enum_name(type));
return fmt::format_to(ctx.out(), "{}", magic_enum::enum_name(type));
}
};
@ -444,7 +444,7 @@ template <>
struct formatter<hf3fs::meta::ChunkId> : formatter<std::string_view> {
template <typename FormatContext>
auto format(hf3fs::meta::ChunkId chunk, FormatContext &ctx) const {
return format_to(ctx.out(), "{}-{}-{}", chunk.inode(), chunk.track(), chunk.chunk());
return fmt::format_to(ctx.out(), "{}-{}-{}", chunk.inode(), chunk.track(), chunk.chunk());
}
};
@ -452,7 +452,7 @@ template <>
struct formatter<hf3fs::meta::Layout::ChunkSize> : formatter<std::string_view> {
template <typename FormatContext>
auto format(hf3fs::meta::Layout::ChunkSize chunk, FormatContext &ctx) const {
return format_to(ctx.out(), "{}", (uint64_t)chunk);
return fmt::format_to(ctx.out(), "{}", (uint64_t)chunk);
}
};
@ -460,7 +460,7 @@ template <>
struct formatter<hf3fs::meta::VersionedLength> : formatter<std::string_view> {
template <typename FormatContext>
auto format(hf3fs::meta::VersionedLength v, FormatContext &ctx) const {
return format_to(ctx.out(), "{}@{}", v.length, v.truncateVer);
return fmt::format_to(ctx.out(), "{}@{}", v.length, v.truncateVer);
}
};

View File

@ -42,7 +42,7 @@ struct formatter<hf3fs::flat::ChainRef> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::flat::ChainRef &ref, FormatContext &ctx) const {
auto [id, v, i] = ref.decode();
return format_to(ctx.out(), "ChainRef({}@{}-{})", id.toUnderType(), v.toUnderType(), i);
return fmt::format_to(ctx.out(), "ChainRef({}@{}-{})", id.toUnderType(), v.toUnderType(), i);
}
};

View File

@ -205,7 +205,7 @@ static_assert(serde::Serializable<ChecksumInfo>);
template <>
struct ::hf3fs::serde::SerdeMethod<::hf3fs::storage::ChunkId> {
static constexpr std::string_view serdeTo(const storage::ChunkId &chunkId) { return chunkId.data(); }
static std::string_view serdeTo(const storage::ChunkId &chunkId) { return chunkId.data(); }
static Result<storage::ChunkId> serdeFrom(std::string_view str) { return storage::ChunkId(str); }
static std::string serdeToReadable(const storage::ChunkId &chunkId) { return chunkId.describe(); };
static Result<storage::ChunkId> serdeFromReadable(std::string_view s) { return storage::ChunkId::fromString(s); }
@ -744,7 +744,7 @@ template <>
struct formatter<hf3fs::storage::ChunkId> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::storage::ChunkId &chunkId, FormatContext &ctx) const {
return format_to(ctx.out(), "ChunkId({})", chunkId.describe());
return fmt::format_to(ctx.out(), "ChunkId({})", chunkId.describe());
}
};
@ -753,13 +753,13 @@ struct formatter<hf3fs::storage::ChunkIdRange> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::storage::ChunkIdRange &range, FormatContext &ctx) const {
if (range.maxNumChunkIdsToProcess == 1) {
return format_to(ctx.out(), "{}", range.begin);
return fmt::format_to(ctx.out(), "{}", range.begin);
} else {
return format_to(ctx.out(),
"ChunkIdRange[{}, {}){{{}}}",
range.begin.describe(),
range.end.describe(),
range.maxNumChunkIdsToProcess);
return fmt::format_to(ctx.out(),
"ChunkIdRange[{}, {}){{{}}}",
range.begin.describe(),
range.end.describe(),
range.maxNumChunkIdsToProcess);
}
}
};
@ -768,7 +768,7 @@ template <>
struct formatter<hf3fs::storage::ChecksumInfo> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::storage::ChecksumInfo &checksum, FormatContext &ctx) const {
return format_to(ctx.out(), "{}#{:08X}", magic_enum::enum_name(checksum.type), ~checksum.value);
return fmt::format_to(ctx.out(), "{}#{:08X}", magic_enum::enum_name(checksum.type), ~checksum.value);
}
};
@ -776,13 +776,13 @@ template <>
struct formatter<hf3fs::storage::IOResult> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::storage::IOResult &result, FormatContext &ctx) const {
return format_to(ctx.out(),
"length{{{}}} version{{{}/{}}} checksum{{{}}} {{{}}}",
result.lengthInfo,
result.updateVer,
result.commitVer,
result.checksum,
result.commitChainVer);
return fmt::format_to(ctx.out(),
"length{{{}}} version{{{}/{}}} checksum{{{}}} {{{}}}",
result.lengthInfo,
result.updateVer,
result.commitVer,
result.checksum,
result.commitChainVer);
}
};
@ -790,7 +790,7 @@ template <>
struct formatter<hf3fs::storage::UpdateChannel> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::storage::UpdateChannel &channel, FormatContext &ctx) const {
return format_to(ctx.out(), "@{}#{}", channel.id, channel.seqnum);
return fmt::format_to(ctx.out(), "@{}#{}", channel.id, channel.seqnum);
}
};
@ -798,7 +798,7 @@ template <>
struct formatter<hf3fs::storage::MessageTag> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::storage::MessageTag &tag, FormatContext &ctx) const {
return format_to(ctx.out(), "@{}#{}:{}", tag.clientId, tag.requestId, tag.channel);
return fmt::format_to(ctx.out(), "@{}#{}:{}", tag.clientId, tag.requestId, tag.channel);
}
};

View File

@ -331,9 +331,10 @@ CoTryTask<void> FDBTransaction::set(std::string_view key, std::string_view value
}
CoTryTask<void> FDBTransaction::setVersionstampedKey(std::string_view key, uint32_t offset, std::string_view value) {
if (offset + 10 > key.size()) {
co_return makeError(StatusCode::kInvalidArg,
fmt::format("setVersionstampedKey: {} + 10 > key.size {}", offset, key.size()));
if (offset + sizeof(kv::Versionstamp) > key.size()) {
co_return makeError(
StatusCode::kInvalidArg,
fmt::format("setVersionstampedKey: {} + sizeof(kv::Versionstamp) > key.size {}", offset, key.size()));
}
co_return co_await OpWrapper<Op::SetVersionstampedKey>::run([&](Op) -> CoTryTask<void> {
tr_.atomicOp(appendOffset(key, offset), value, FDB_MUTATION_TYPE_SET_VERSIONSTAMPED_KEY);
@ -342,9 +343,10 @@ CoTryTask<void> FDBTransaction::setVersionstampedKey(std::string_view key, uint3
}
CoTryTask<void> FDBTransaction::setVersionstampedValue(std::string_view key, std::string_view value, uint32_t offset) {
if (offset + 10 > value.size()) {
co_return makeError(StatusCode::kInvalidArg,
fmt::format("setVersionstampedValue: {} + 10 > value.size {}", offset, value.size()));
if (offset + sizeof(kv::Versionstamp) > value.size()) {
co_return makeError(
StatusCode::kInvalidArg,
fmt::format("setVersionstampedValue: {} + sizeof(kv::Versionstamp) > value.size {}", offset, value.size()));
}
co_return co_await OpWrapper<Op::SetVersionstampedValue>::run([&](Op) -> CoTryTask<void> {
tr_.atomicOp(key, appendOffset(value, offset), FDB_MUTATION_TYPE_SET_VERSIONSTAMPED_VALUE);

View File

@ -12,6 +12,7 @@
#include <folly/Utility.h>
#include <folly/experimental/coro/BlockingWait.h>
#include <folly/logging/xlog.h>
#include <fstream>
#include <fuse3/fuse_lowlevel.h>
#include <iostream>
#include <linux/fs.h>

View File

@ -141,7 +141,7 @@ Result<meta::Inode> UserConfig::statConfig(meta::InodeId iid, const meta::UserIn
auto key = isSys ? systemKeys[kidx] : userKeys[kidx];
return meta::Inode{iid,
{meta::Symlink{config.find(key).value()->toString()},
meta::Acl{ui.uid, ui.gid, meta::Permission{isSys ? 0444 : 0400}}}};
meta::Acl{ui.uid, ui.gid, meta::Permission{isSys ? 0444u : 0400u}}}};
}
std::pair<std::shared_ptr<std::vector<meta::DirEntry>>, std::shared_ptr<std::vector<std::optional<meta::Inode>>>>

View File

@ -1,2 +1,2 @@
target_add_lib(hf3fs_api client-lib-common storage-client procps numa rt)
target_add_shared_lib(hf3fs_api_shared client-lib-common storage-client procps numa rt)
target_add_lib(hf3fs_api client-lib-common storage-client numa rt)
target_add_shared_lib(hf3fs_api_shared client-lib-common storage-client numa rt)

View File

@ -4,11 +4,11 @@
User Space Ring Based IO, or USRBIO, is a set of high-speed I/O functions on 3FS. User applications can directly submit I/O requests to the 3FS I/O queue in the FUSE process via the USRBIO API, thereby bypassing certain limitations inherent to FUSE itself. For example, this approach avoids the maximum single I/O size restriction, which is notoriously unfriendly to network file systems. It also makes the data exchange between the user and FUSE processes.
## Concepts
**Iov**: A large shared memory region for zero-copy read/write operations, shared between the user and FUSE processes, with InfiniBand (IB) memory registration managed by the FUSE process. In the USRBIO API, all read data will be read into Iov, and all write data should be writen to Iov by user first.
**Iov**: A large shared memory region for zero-copy read/write operations, shared between the user and FUSE processes, with InfiniBand (IB) memory registration managed by the FUSE process. In the USRBIO API, all read data will be read into Iov, and all write data should be written to Iov by user first.
**Ior**: A small shared memory ring for communication between user process and FUSE process. The usage of Ior is similar to Linux [io-uring](https://unixism.net/loti/index.html), where the user application enqueues read/write requests, and the FUSE process dequeues these requests for completion. The I/Os are executed in batches controlled by the `io_depth` parameter, and multiple batches will be executed in parallel, be they from different rings, or even from the same ring. However, multiple rings are still recommended for multi-threaded applications, as synchronization is unavoidable when sharing a ring.
**File descriptor Registration**: Functions are provided for file descriptor registration and deregistration. Only registered fds can be used for the USRBIO. The file descriptors in the user applicaiton are managed by the Linux kernel and the FUSE process has no way to know how they're actually associated with inode IDs it manages. The registration makes the I/O preparation function look more like the [uring counterpart](https://unixism.net/loti/ref-liburing/submission.html).
**File descriptor Registration**: Functions are provided for file descriptor registration and deregistration. Only registered fds can be used for the USRBIO. The file descriptors in the user application are managed by the Linux kernel and the FUSE process has no way to know how they're actually associated with inode IDs it manages. The registration makes the I/O preparation function look more like the [uring counterpart](https://unixism.net/loti/ref-liburing/submission.html).
## Functions
@ -131,7 +131,7 @@ int hf3fs_reg_fd(int fd, uint64_t flags);
### hf3fs_dereg_fd
#### Summary
Deegister a file descriptor.
Deregister a file descriptor.
#### Syntax
```c

View File

@ -134,11 +134,11 @@ template <>
struct formatter<hf3fs::meta::server::FileSession> : formatter<std::string_view> {
template <typename FormatContext>
auto format(const hf3fs::meta::server::FileSession &session, FormatContext &ctx) const {
return format_to(ctx.out(),
"{{inodeId {}, client {}, session {}}}",
session.inodeId,
session.clientId,
session.sessionId);
return fmt::format_to(ctx.out(),
"{{inodeId {}, client {}, session {}}}",
session.inodeId,
session.clientId,
session.sessionId);
}
};

View File

@ -2,6 +2,8 @@
name = "chunk_engine"
version = "0.1.11"
edition = "2021"
license.workspace = true
rust-version.workspace = true
[lib]
crate-type = ["lib", "staticlib"]

View File

@ -7,25 +7,27 @@ use std::sync::Arc;
use crate::*;
pub use ::cxx::CxxString;
fn create(path: &str, create: bool, prefix_len: usize, error: Pin<&mut CxxString>) -> Box<Engine> {
fn create(path: &str, create: bool, prefix_len: usize, error: Pin<&mut CxxString>) -> *mut Engine {
let config = EngineConfig {
path: PathBuf::from(path),
create,
prefix_len,
};
match Engine::open(&config) {
Ok(engine) => Box::new(engine),
Ok(engine) => Box::into_raw(Box::new(engine)),
Err(e) => {
error.push_str(&e.to_string());
unsafe { Box::from_raw(std::ptr::null_mut()) }
std::ptr::null_mut()
}
}
}
fn release(_engine: Box<Engine>) {}
#[allow(dead_code)]
struct LogGuard(tracing_appender::non_blocking::WorkerGuard);
fn init_log(path: &str, error: Pin<&mut CxxString>) -> Box<LogGuard> {
fn init_log(path: &str, error: Pin<&mut CxxString>) -> *mut LogGuard {
match rolling_file::BasicRollingFileAppender::new(
path,
rolling_file::RollingConditionBasic::new().max_size(Size::mebibyte(500).into()),
@ -38,11 +40,11 @@ fn init_log(path: &str, error: Pin<&mut CxxString>) -> Box<LogGuard> {
.with_writer(non_blocking)
.with_ansi(false)
.init();
Box::new(LogGuard(guard))
Box::into_raw(Box::new(LogGuard(guard)))
}
Err(e) => {
error.push_str(&e.to_string());
unsafe { Box::from_raw(std::ptr::null_mut()) }
std::ptr::null_mut()
}
}
}
@ -456,7 +458,8 @@ pub mod ffi {
create: bool,
prefix_len: usize,
error: Pin<&mut CxxString>,
) -> Box<Engine>;
) -> *mut Engine;
fn release(engine: Box<Engine>);
fn raw_used_size(&self) -> RawUsedSize;
fn allocate_groups(&self, min_remain: usize, max_remain: usize, batch_size: usize)
@ -546,7 +549,7 @@ pub mod ffi {
extern "Rust" {
type LogGuard;
fn init_log(path: &str, error: Pin<&mut CxxString>) -> Box<LogGuard>;
fn init_log(path: &str, error: Pin<&mut CxxString>) -> *mut LogGuard;
}
extern "Rust" {

View File

@ -2,6 +2,7 @@
#include <boost/filesystem/operations.hpp>
#include <folly/experimental/symbolizer/Symbolizer.h>
#include <fstream>
#include <sys/stat.h>
#include "common/monitor/Recorder.h"

View File

@ -60,7 +60,7 @@ Result<Void> StorageTargets::init(CPUExecutorGroup &executor) {
if (!error.empty()) {
co_return makeError(StorageCode::kStorageStatFailed, std::move(error));
}
co_return engine;
co_return rust::Box<chunk_engine::Engine>::from_raw(engine);
}).scheduleOn(&executor.pickNext()));
}
@ -115,7 +115,7 @@ Result<Void> StorageTargets::create(const CreateConfig &createConfig) {
targetConfig.only_chunk_engine = createConfig.only_chunk_engine();
RETURN_AND_LOG_ON_ERROR(storageTarget->create(targetConfig));
++idx;
RETURN_AND_LOG_ON_ERROR(targetMap_.addStorageTarget(storageTarget));
RETURN_AND_LOG_ON_ERROR(targetMap_.addStorageTarget(std::move(storageTarget)));
}
return Void{};
}
@ -189,7 +189,7 @@ Result<Void> StorageTargets::create(const CreateTargetReq &req) {
targetConfig.only_chunk_engine = req.onlyChunkEngine;
RETURN_AND_LOG_ON_ERROR(storageTarget->create(targetConfig));
XLOGF(INFO, "Create storage target {} at {}", storageTarget->targetId(), targetPath.string());
RETURN_AND_LOG_ON_ERROR(targetMap_.addStorageTarget(storageTarget));
RETURN_AND_LOG_ON_ERROR(targetMap_.addStorageTarget(std::move(storageTarget)));
return Void{};
}
@ -242,7 +242,7 @@ Result<Void> StorageTargets::loadTarget(const Path &targetPath) {
XLOG(ERR, msg);
return makeError(StorageCode::kStorageInitFailed, std::move(msg));
}
RETURN_AND_LOG_ON_ERROR(targetMap_.addStorageTarget(storageTarget));
RETURN_AND_LOG_ON_ERROR(targetMap_.addStorageTarget(std::move(storageTarget)));
return Void{};
}

View File

@ -1,5 +1,7 @@
#include "storage/worker/CheckWorker.h"
#include <fstream>
#include "common/monitor/Recorder.h"
#include "common/utils/Duration.h"
#include "common/utils/UtcTime.h"

View File

@ -1,6 +1,7 @@
#include "storage/worker/DumpWorker.h"
#include <gperftools/profiler.h>
#include <fstream>
#include <memory>
#include <sys/times.h>

View File

@ -50,7 +50,7 @@ auto createStorageEventTrace(size_t id) {
ClientId::zero(),
storage::RequestId{id},
storage::UpdateChannel{
.id = storage::ChannelId{id},
.id = (storage::ChannelId)id,
.seqnum = storage::ChannelSeqNum{id},
},
},

View File

@ -375,7 +375,7 @@ bool UnitTestFabric::setUpStorageSystem() {
if (!setupConfig_.start_storage_server()) {
for (auto &[key, value] : storageEndpoints) {
nodeEndpoints[storage::NodeId{std::stoul(key)}] = value;
nodeEndpoints[(storage::NodeId)std::stoul(key)] = value;
}
} else {
for (uint32_t nodeIndex = 0; nodeIndex < numStorageNodes; nodeIndex++) {

View File

@ -12,6 +12,7 @@
#include <folly/logging/Logger.h>
#include <folly/logging/LoggerDB.h>
#include <folly/logging/xlog.h>
#include <fstream>
#include <gtest/gtest.h>
#include <string_view>
#include <sys/stat.h>