mirror of
https://github.com/deepseek-ai/3FS
synced 2025-06-26 18:16:45 +00:00
Initial commit
This commit is contained in:
1
src/analytics/CMakeLists.txt
Normal file
1
src/analytics/CMakeLists.txt
Normal file
@@ -0,0 +1 @@
|
||||
target_add_lib(analytics common apache_arrow_static)
|
||||
65
src/analytics/Common.h
Normal file
65
src/analytics/Common.h
Normal file
@@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/StrongType.h"
|
||||
|
||||
namespace hf3fs::serde {
|
||||
|
||||
template <typename T>
|
||||
using SerdeToReadableMemberMethodReturnType = std::invoke_result_t<decltype(&T::serdeToReadable), T>;
|
||||
|
||||
template <typename T>
|
||||
using SerdeToMemberMethodReturnType = std::invoke_result_t<decltype(&T::serdeTo), T>;
|
||||
|
||||
template <typename T>
|
||||
using SerdeToReadableReturnType = std::invoke_result_t<decltype(serde::SerdeMethod<T>::serdeToReadable), const T &>;
|
||||
|
||||
template <typename T>
|
||||
using SerdeToReturnType = std::invoke_result_t<decltype(serde::SerdeMethod<T>::serdeTo), const T &>;
|
||||
|
||||
template <typename T>
|
||||
concept ConvertibleToString = std::is_convertible_v<T, std::string>;
|
||||
|
||||
template <typename T>
|
||||
concept WithReadableSerdeMemberMethod =
|
||||
!StrongTyped<T> && !ConvertibleToString<T> && requires(const T &t, SerdeToReadableMemberMethodReturnType<T> s) {
|
||||
{ t.serdeToReadable() } -> std::convertible_to<SerdeToReadableMemberMethodReturnType<T>>;
|
||||
{ T::serdeFromReadable(s) } -> std::convertible_to<Result<T>>;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
concept WithReadableSerdeMethod = !StrongTyped<T> && !ConvertibleToString<T> && !WithReadableSerdeMemberMethod<T> &&
|
||||
requires(const T &t, SerdeToReadableReturnType<T> s) {
|
||||
{ serde::SerdeMethod<T>::serdeToReadable(t) } -> std::convertible_to<SerdeToReadableReturnType<T>>;
|
||||
{ serde::SerdeMethod<T>::serdeFromReadable(s) } -> std::convertible_to<Result<T>>;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
concept WithSerdeMemberMethod =
|
||||
!StrongTyped<T> && !ConvertibleToString<T> && !WithReadableSerdeMemberMethod<T> && !WithReadableSerdeMethod<T> &&
|
||||
requires(const T &t, SerdeToMemberMethodReturnType<T> s) {
|
||||
{ t.serdeTo() } -> std::convertible_to<SerdeToMemberMethodReturnType<T>>;
|
||||
{ T::serdeFrom(s) } -> std::convertible_to<Result<T>>;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
concept WithSerdeMethod =
|
||||
!StrongTyped<T> && !ConvertibleToString<T> && !WithReadableSerdeMemberMethod<T> && !WithReadableSerdeMethod<T> &&
|
||||
!WithSerdeMemberMethod<T> && requires(const T &t, SerdeToReturnType<T> s) {
|
||||
{ serde::SerdeMethod<T>::serdeTo(t) } -> std::convertible_to<SerdeToReturnType<T>>;
|
||||
{ serde::SerdeMethod<T>::serdeFrom(s) } -> std::convertible_to<Result<T>>;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
concept SerdeTypeWithoutSpecializedSerdeMethod =
|
||||
serde::SerdeType<T> && !WithReadableSerdeMemberMethod<T> && !WithReadableSerdeMethod<T> &&
|
||||
!WithSerdeMemberMethod<T> && !WithSerdeMethod<T>;
|
||||
|
||||
} // namespace hf3fs::serde
|
||||
|
||||
namespace hf3fs::analytics {
|
||||
|
||||
const std::string kVariantValueIndexColumnSuffix = "ValIdx";
|
||||
const std::string kResultErrorTypeColumnSuffix = "Error";
|
||||
|
||||
} // namespace hf3fs::analytics
|
||||
238
src/analytics/SerdeObjectReader.h
Normal file
238
src/analytics/SerdeObjectReader.h
Normal file
@@ -0,0 +1,238 @@
|
||||
#pragma once
|
||||
#include <arrow/io/file.h>
|
||||
#include <optional>
|
||||
#include <parquet/stream_reader.h>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "SerdeObjectVisitor.h"
|
||||
#include "SerdeSchemaBuilder.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Nameof.hpp"
|
||||
#include "common/utils/StrongType.h"
|
||||
#include "common/utils/TypeTraits.h"
|
||||
|
||||
namespace hf3fs::analytics {
|
||||
|
||||
template <serde::SerdeType SerdeType>
|
||||
class SerdeObjectReader : public BaseObjectVisitor<SerdeObjectReader<SerdeType>> {
|
||||
public:
|
||||
SerdeObjectReader(parquet::StreamReader &&reader)
|
||||
: reader_(std::move(reader)) {}
|
||||
|
||||
static std::shared_ptr<SerdeObjectReader> open(const Path path) {
|
||||
// open file
|
||||
auto openStream = arrow::io::ReadableFile::Open(path.string());
|
||||
|
||||
if (!openStream.ok()) {
|
||||
XLOGF(ERR, "Failed to open file input stream: {}, error: {}", path.string(), openStream.status().message());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::io::ReadableFile> infile;
|
||||
PARQUET_ASSIGN_OR_THROW(infile, openStream);
|
||||
|
||||
try {
|
||||
parquet::StreamReader streamReader{parquet::ParquetFileReader::Open(infile)};
|
||||
return std::make_shared<SerdeObjectReader>(std::move(streamReader));
|
||||
} catch (const std::exception &ex) {
|
||||
XLOGF(ERR, "Failed to create stream reader: {}, error: {}", path.string(), ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
SerdeObjectReader &operator>>(SerdeType &v) {
|
||||
eof_ = eof();
|
||||
if (!bool(*this)) return *this;
|
||||
|
||||
try {
|
||||
visit("", v);
|
||||
reader_ >> parquet::EndRow;
|
||||
} catch (const parquet::ParquetException &ex) {
|
||||
XLOGF(CRITICAL, "Failed to read from parquet file, error: {}", ex.what());
|
||||
isOk_ = false;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator bool() const { return ok() && !eof_; }
|
||||
|
||||
bool ok() const { return isOk_; }
|
||||
|
||||
bool eof() const { return reader_.eof(); }
|
||||
|
||||
size_t numRows() const { return reader_.num_rows(); }
|
||||
|
||||
public:
|
||||
// default
|
||||
template <typename T>
|
||||
void visit(std::string_view k, T &v) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
void visit(std::string_view k, T &v) {
|
||||
reader_ >> v;
|
||||
XLOGF(DBG3, "arithmetic visit({}): {}", k, v);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void visit(std::string_view k, T &v) {
|
||||
int32_t n;
|
||||
reader_ >> n;
|
||||
XLOGF(DBG3, "enum visit({}): {}", k, n);
|
||||
auto result = magic_enum::enum_cast<T>(n);
|
||||
if (result) {
|
||||
v = *result;
|
||||
} else {
|
||||
XLOGF(CRITICAL, "Failed to parse enum {} from value: {}", nameof::nameof_short_type<T>(), n);
|
||||
}
|
||||
}
|
||||
|
||||
template <serde::ConvertibleToString T>
|
||||
void visit(std::string_view k, T &v) {
|
||||
reader_ >> v;
|
||||
XLOGF(DBG3, "string visit({}): {}", k, v);
|
||||
}
|
||||
|
||||
template <StrongTyped T>
|
||||
void visit(std::string_view k, T &v) {
|
||||
XLOGF(DBG3, "strongtyped visit({})", k);
|
||||
BaseObjectVisitor<SerdeObjectReader>::visit(k, v);
|
||||
}
|
||||
|
||||
template <serde::WithReadableSerdeMethod T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "WithReadableSerdeMethod visit({})", k);
|
||||
typename serde::SerdeToReadableReturnType<T> serialized;
|
||||
visit(k, serialized);
|
||||
auto result = serde::SerdeMethod<T>::serdeFromReadable(serialized);
|
||||
if (result) {
|
||||
val = *result;
|
||||
} else {
|
||||
XLOGF(CRITICAL, "Failed to parse {} from value: {}", nameof::nameof_short_type<T>(), serialized);
|
||||
}
|
||||
}
|
||||
|
||||
template <serde::WithSerdeMethod T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "WithSerdeMethod visit({})", k);
|
||||
typename serde::SerdeToReturnType<T> serialized;
|
||||
visit(k, serialized);
|
||||
auto result = serde::SerdeMethod<T>::serdeFrom(serialized);
|
||||
if (result) {
|
||||
val = *result;
|
||||
} else {
|
||||
XLOGF(CRITICAL, "Failed to parse {} from value: {}", nameof::nameof_short_type<T>(), serialized);
|
||||
}
|
||||
}
|
||||
|
||||
template <serde::WithReadableSerdeMemberMethod T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "WithReadableSerdeMemberMethod visit({})", k);
|
||||
serde::SerdeToReadableMemberMethodReturnType<T> serialized;
|
||||
visit(k, serialized);
|
||||
auto result = T::serdeFromReadable(serialized);
|
||||
if (result) {
|
||||
val = *result;
|
||||
} else {
|
||||
XLOGF(CRITICAL, "Failed to parse {} from value: {}", nameof::nameof_short_type<T>(), serialized);
|
||||
}
|
||||
}
|
||||
|
||||
template <serde::WithSerdeMemberMethod T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "WithSerdeMemberMethod visit({})", k);
|
||||
serde::SerdeToReadableMemberMethodReturnType<T> serialized;
|
||||
visit(k, serialized);
|
||||
auto result = T::serdeFromReadable(serialized);
|
||||
if (result) {
|
||||
val = *result;
|
||||
} else {
|
||||
XLOGF(CRITICAL, "Failed to parse {} from value: {}", nameof::nameof_short_type<T>(), serialized);
|
||||
}
|
||||
}
|
||||
|
||||
template <serde::SerdeTypeWithoutSpecializedSerdeMethod T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "serdetype visit({})", k);
|
||||
BaseObjectVisitor<SerdeObjectReader>::visit(k, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_specialization_of_v<T, folly::Expected>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "result visit({})", k);
|
||||
std::string errorColumnName = std::string{k} + kResultErrorTypeColumnSuffix;
|
||||
|
||||
Status status(StatusCode::kOK);
|
||||
typename T::value_type value;
|
||||
visit<typename T::error_type>(errorColumnName, status);
|
||||
visit<typename T::value_type>(k, value);
|
||||
|
||||
if (status.isOK()) {
|
||||
val = std::move(value);
|
||||
} else {
|
||||
val = makeError(status);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_variant_v<T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "variant visit({})", k);
|
||||
// get the index of value
|
||||
std::string valIdxColumnName = std::string{k} + kVariantValueIndexColumnSuffix;
|
||||
uint32_t valIdx = 0;
|
||||
visit<uint32_t>(valIdxColumnName, valIdx);
|
||||
|
||||
// read and set the value
|
||||
uint32_t altIdx = 0;
|
||||
visitVariant(val, [&](std::string_view typeName, auto &&v) {
|
||||
std::string altTypeName = std::string{k} + std::string{typeName};
|
||||
std::remove_reference_t<decltype(v)> alt;
|
||||
visit(altTypeName, alt);
|
||||
if (altIdx == valIdx) val = std::move(alt);
|
||||
altIdx++;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_vector_v<T> || is_set_v<T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
std::string str;
|
||||
reader_ >> str;
|
||||
XLOGF(DBG3, "container visit({}): {}", k, str);
|
||||
auto result = serde::fromJsonString(val, str);
|
||||
if (!result) {
|
||||
XLOGF(CRITICAL, "Failed to parse {} from json string: {}", nameof::nameof_short_type<T>(), str);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_optional_v<T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
std::string str;
|
||||
reader_ >> str;
|
||||
XLOGF(DBG3, "container visit({}): {}", k, str);
|
||||
if (str.empty()) {
|
||||
val = std::nullopt;
|
||||
} else {
|
||||
using ValueType = typename T::value_type;
|
||||
val = ValueType();
|
||||
auto result = serde::fromJsonString(*val, str);
|
||||
if (!result) {
|
||||
XLOGF(CRITICAL, "Failed to parse {} from json string: {}", nameof::nameof_short_type<ValueType>(), str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
parquet::StreamReader reader_;
|
||||
bool isOk_{true};
|
||||
bool eof_{false};
|
||||
};
|
||||
|
||||
} // namespace hf3fs::analytics
|
||||
129
src/analytics/SerdeObjectVisitor.h
Normal file
129
src/analytics/SerdeObjectVisitor.h
Normal file
@@ -0,0 +1,129 @@
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "Common.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Nameof.hpp"
|
||||
#include "common/utils/StrongType.h"
|
||||
#include "common/utils/TypeTraits.h"
|
||||
|
||||
namespace hf3fs::analytics {
|
||||
|
||||
class ObjectVisitor {
|
||||
public:
|
||||
// default
|
||||
template <typename T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <StrongTyped T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <serde::ConvertibleToString T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <serde::SerdeTypeWithoutSpecializedSerdeMethod T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires is_variant_v<T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires is_vector_v<T> || is_set_v<T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires is_optional_v<T>
|
||||
void visit(std::string_view, T &) = delete;
|
||||
};
|
||||
|
||||
template <size_t I = 0>
|
||||
inline void visitVariant(auto &&t, auto &&func) {
|
||||
using T = std::decay_t<decltype(t)>;
|
||||
using S = std::variant_alternative_t<I, T>;
|
||||
if (t.index() == I) {
|
||||
func(nameof::nameof_short_type<S>(), std::get<I>(t));
|
||||
} else {
|
||||
func(nameof::nameof_short_type<S>(), std::variant_alternative_t<I, T>{});
|
||||
}
|
||||
if constexpr (I + 1 < std::variant_size_v<T>) {
|
||||
visitVariant<I + 1>(t, func);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
class BaseObjectVisitor : public ObjectVisitor {
|
||||
public:
|
||||
template <typename T>
|
||||
void visit(std::string_view k, T &) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
void visit(std::string_view k, T &v) {
|
||||
XLOGF(DBG3, "arithmetic visit({})", k);
|
||||
static_cast<Derived *>(this)->template visit<T>(k, v);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void visit(std::string_view k, T &) = delete;
|
||||
|
||||
template <serde::ConvertibleToString T>
|
||||
void visit(std::string_view k, T &) = delete;
|
||||
|
||||
template <StrongTyped T>
|
||||
void visit(std::string_view k, T &v) {
|
||||
XLOGF(DBG3, "strongtyped visit({})", k);
|
||||
static_cast<Derived *>(this)->template visit<typename T::UnderlyingType>(k, v);
|
||||
}
|
||||
|
||||
template <serde::SerdeTypeWithoutSpecializedSerdeMethod T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "serdetype visit({})", k);
|
||||
refl::Helper::iterate<T>(
|
||||
[&](auto type) { static_cast<Derived *>(this)->template visit(type.name, val.*type.getter); });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_variant_v<T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "variant visit({})", k);
|
||||
visitVariant(val, [&](std::string_view typeName, auto &&v) {
|
||||
std::string altTypeName = std::string{k} + std::string{typeName};
|
||||
static_cast<Derived *>(this)->template visit(altTypeName, v);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_vector_v<T> || is_set_v<T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "container visit({})", k);
|
||||
for (auto item : val) {
|
||||
static_cast<Derived *>(this)->template visit(k, item);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_optional_v<T>
|
||||
void visit(std::string_view k, T &val) {
|
||||
XLOGF(DBG3, "optional visit({})", k);
|
||||
using ValueType = typename T::value_type;
|
||||
if (val.has_value()) {
|
||||
static_cast<Derived *>(this)->template visit<ValueType>(k, *val);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace hf3fs::analytics
|
||||
241
src/analytics/SerdeObjectWriter.h
Normal file
241
src/analytics/SerdeObjectWriter.h
Normal file
@@ -0,0 +1,241 @@
|
||||
#pragma once
|
||||
#include <arrow/io/file.h>
|
||||
#include <parquet/stream_writer.h>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "SerdeObjectVisitor.h"
|
||||
#include "SerdeSchemaBuilder.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Nameof.hpp"
|
||||
#include "common/utils/StrongType.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
|
||||
namespace hf3fs::analytics {
|
||||
|
||||
template <serde::SerdeType SerdeType>
|
||||
class SerdeObjectWriter : public BaseObjectVisitor<SerdeObjectWriter<SerdeType>> {
|
||||
public:
|
||||
SerdeObjectWriter(parquet::StreamWriter &&writer)
|
||||
: writer_(std::move(writer)),
|
||||
createTime_(UtcClock::now()) {}
|
||||
|
||||
static std::shared_ptr<SerdeObjectWriter> open(const Path path,
|
||||
const bool append = false,
|
||||
const size_t maxRowGroupLength = 1'000'000,
|
||||
const std::vector<parquet::SortingColumn> &sortedColumns = {}) {
|
||||
// open file
|
||||
auto openStream = arrow::io::FileOutputStream::Open(path.string(), append);
|
||||
|
||||
if (!openStream.ok()) {
|
||||
XLOGF(ERR, "Failed to open file output stream: {}, error: {}", path.string(), openStream.status().message());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::io::FileOutputStream> outfile;
|
||||
PARQUET_ASSIGN_OR_THROW(outfile, openStream);
|
||||
|
||||
// generate schema
|
||||
SerdeSchemaBuilder<SerdeType> schemaBuilder;
|
||||
auto schemaNode = schemaBuilder.getSchema();
|
||||
|
||||
if (schemaNode == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
parquet::WriterProperties::Builder writerBuilder;
|
||||
writerBuilder.set_sorting_columns(sortedColumns);
|
||||
writerBuilder.max_row_group_length(maxRowGroupLength);
|
||||
writerBuilder.data_page_version(parquet::ParquetDataPageVersion::V2);
|
||||
|
||||
// set global encoding and compression method
|
||||
// writerBuilder.encoding(parquet::Encoding::DELTA_BINARY_PACKED);
|
||||
writerBuilder.compression(parquet::Compression::ZSTD);
|
||||
|
||||
// set encoding for string columns
|
||||
// for (int fieldIndex = 0; fieldIndex < schemaNode->field_count(); fieldIndex++) {
|
||||
// auto fieldNode = schemaNode->field(fieldIndex);
|
||||
// auto fieldType = fieldNode->logical_type();
|
||||
// if (fieldType->is_string()) writerBuilder.encoding(fieldNode->name(),
|
||||
// parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY);
|
||||
// }
|
||||
|
||||
try {
|
||||
auto fileWriter = parquet::ParquetFileWriter::Open(outfile, schemaNode, writerBuilder.build());
|
||||
if (fileWriter == nullptr) {
|
||||
XLOGF(ERR, "Failed to open file writer: {}", path.string());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
parquet::StreamWriter streamWriter(std::move(fileWriter));
|
||||
return std::make_shared<SerdeObjectWriter>(std::move(streamWriter));
|
||||
|
||||
} catch (const std::exception &ex) {
|
||||
XLOGF(ERR, "Failed to create stream writer: {}, error: {}", path.string(), ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
SerdeObjectWriter &operator<<(const SerdeType &v) {
|
||||
if (!bool(*this)) return *this;
|
||||
|
||||
try {
|
||||
visit("", v);
|
||||
writer_ << parquet::EndRow;
|
||||
} catch (const parquet::ParquetException &ex) {
|
||||
XLOGF(CRITICAL, "Failed to write to parquet file, error: {}", ex.what());
|
||||
isOk_ = false;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void endRowGroup() { writer_.EndRowGroup(); }
|
||||
|
||||
UtcTime createTime() { return createTime_; }
|
||||
|
||||
operator bool() const { return ok(); }
|
||||
|
||||
bool ok() const { return isOk_; }
|
||||
|
||||
public:
|
||||
// default
|
||||
template <typename T>
|
||||
void visit(std::string_view k, const T &v) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
void visit(std::string_view k, const T &v) {
|
||||
XLOGF(DBG3, "arithmetic visit({})", k);
|
||||
writer_ << v;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void visit(std::string_view k, const T &v) {
|
||||
XLOGF(DBG3, "enum visit({})", k);
|
||||
writer_ << (int32_t)v;
|
||||
}
|
||||
|
||||
template <serde::ConvertibleToString T>
|
||||
void visit(std::string_view k, const T &v) {
|
||||
XLOGF(DBG3, "string visit({})", k);
|
||||
writer_ << v;
|
||||
}
|
||||
|
||||
template <StrongTyped T>
|
||||
void visit(std::string_view k, const T &v) {
|
||||
XLOGF(DBG3, "strongtyped visit({})", k);
|
||||
visit<typename T::UnderlyingType>(k, v.toUnderType());
|
||||
}
|
||||
|
||||
template <serde::WithReadableSerdeMethod T>
|
||||
void visit(std::string_view k, const T &val) {
|
||||
auto serialized = serde::SerdeMethod<T>::serdeToReadable(val);
|
||||
XLOGF(DBG3,
|
||||
"WithReadableSerdeMethod visit({}), serialized: {} {}",
|
||||
k,
|
||||
nameof::nameof_type<decltype(serialized)>(),
|
||||
serialized);
|
||||
visit<serde::SerdeToReadableReturnType<T>>(k, serialized);
|
||||
}
|
||||
|
||||
template <serde::WithSerdeMethod T>
|
||||
void visit(std::string_view k, const T &val) {
|
||||
auto serialized = serde::SerdeMethod<T>::serdeTo(val);
|
||||
XLOGF(DBG3,
|
||||
"WithSerdeMethod visit({}), serialized: {} {}",
|
||||
k,
|
||||
nameof::nameof_type<decltype(serialized)>(),
|
||||
serialized);
|
||||
visit(k, serialized);
|
||||
}
|
||||
|
||||
template <serde::WithReadableSerdeMemberMethod T>
|
||||
void visit(std::string_view k, const T &v) {
|
||||
auto serialized = v.serdeToReadable();
|
||||
XLOGF(DBG3,
|
||||
"WithReadableSerdeMemberMethod visit({}), serialized: {} {}",
|
||||
k,
|
||||
nameof::nameof_type<decltype(serialized)>(),
|
||||
serialized);
|
||||
visit(k, serialized);
|
||||
}
|
||||
|
||||
template <serde::WithSerdeMemberMethod T>
|
||||
void visit(std::string_view k, const T &v) {
|
||||
auto serialized = v.serdeTo();
|
||||
XLOGF(DBG3,
|
||||
"WithSerdeMemberMethod visit({}), serialized: {} {}",
|
||||
k,
|
||||
nameof::nameof_type<decltype(serialized)>(),
|
||||
serialized);
|
||||
visit(k, serialized);
|
||||
}
|
||||
|
||||
template <serde::SerdeTypeWithoutSpecializedSerdeMethod T>
|
||||
void visit(std::string_view k, const T &val) {
|
||||
XLOGF(DBG3, "serdetype visit({})", k);
|
||||
BaseObjectVisitor<SerdeObjectWriter>::visit(k, const_cast<T &>(val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_specialization_of_v<T, folly::Expected>
|
||||
void visit(std::string_view k, const T &val) {
|
||||
XLOGF(DBG3, "result visit({})", k);
|
||||
std::string errorColumnName = std::string{k} + kResultErrorTypeColumnSuffix;
|
||||
|
||||
if (val.hasValue()) {
|
||||
Status ok(StatusCode::kOK);
|
||||
visit<typename T::error_type>(errorColumnName, ok);
|
||||
visit<typename T::value_type>(k, val.value());
|
||||
} else {
|
||||
typename T::value_type value{};
|
||||
visit<typename T::error_type>(errorColumnName, val.error());
|
||||
visit<typename T::value_type>(k, value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_variant_v<T>
|
||||
void visit(std::string_view k, const T &val) {
|
||||
XLOGF(DBG3, "variant visit({})", k);
|
||||
std::string valIdxColumnName = std::string{k} + kVariantValueIndexColumnSuffix;
|
||||
visit<uint32_t>(valIdxColumnName, val.index());
|
||||
BaseObjectVisitor<SerdeObjectWriter>::visit(k, const_cast<T &>(val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_vector_v<T> || is_set_v<T>
|
||||
void visit(std::string_view k, const T &val) {
|
||||
XLOGF(DBG3, "container visit({})", k);
|
||||
auto str = serde::toJsonString(val);
|
||||
writer_ << str;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_optional_v<T>
|
||||
void visit(std::string_view k, const T &val) {
|
||||
XLOGF(DBG3, "optional visit({})", k);
|
||||
if (!val.has_value()) {
|
||||
writer_ << "";
|
||||
} else {
|
||||
auto str = serde::toJsonString(*val);
|
||||
writer_ << str;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
parquet::StreamWriter writer_;
|
||||
UtcTime createTime_;
|
||||
bool isOk_{true};
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
SerdeObjectWriter<T> &operator<<(SerdeObjectWriter<T> &writer, parquet::EndRowGroupType) {
|
||||
writer.endRowGroup();
|
||||
return writer;
|
||||
}
|
||||
|
||||
} // namespace hf3fs::analytics
|
||||
219
src/analytics/SerdeSchemaBuilder.h
Normal file
219
src/analytics/SerdeSchemaBuilder.h
Normal file
@@ -0,0 +1,219 @@
|
||||
#pragma once
|
||||
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <numeric>
|
||||
#include <parquet/exception.h>
|
||||
#include <parquet/schema.h>
|
||||
|
||||
#include "SerdeStructVisitor.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Result.h"
|
||||
#include "common/utils/TypeTraits.h"
|
||||
|
||||
namespace hf3fs::analytics {
|
||||
|
||||
template <serde::SerdeType SerdeType>
|
||||
class SerdeSchemaBuilder : public BaseStructVisitor<SerdeSchemaBuilder<SerdeType>> {
|
||||
public:
|
||||
std::shared_ptr<parquet::schema::GroupNode> getSchema() {
|
||||
try {
|
||||
fields_.clear();
|
||||
fieldNameParts_.clear();
|
||||
this->visit<SerdeType>("");
|
||||
return std::static_pointer_cast<parquet::schema::GroupNode>(
|
||||
parquet::schema::GroupNode::Make("schema", parquet::Repetition::REQUIRED, fields_));
|
||||
} catch (const parquet::ParquetException &ex) {
|
||||
XLOGF(CRITICAL, "Failed to build schema of type {}, error: {}", nameof::nameof_full_type<SerdeType>(), ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
// default
|
||||
template <typename T>
|
||||
void visit(std::string_view k) = delete;
|
||||
|
||||
template <>
|
||||
void visit<bool>(std::string_view k) {
|
||||
XLOGF(DBG3, "bool visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::None(),
|
||||
parquet::Type::BOOLEAN));
|
||||
}
|
||||
|
||||
template <>
|
||||
void visit<int16_t>(std::string_view k) {
|
||||
XLOGF(DBG3, "int16_t visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::Int(16, true),
|
||||
parquet::Type::INT32));
|
||||
}
|
||||
|
||||
template <>
|
||||
void visit<uint16_t>(std::string_view k) {
|
||||
XLOGF(DBG3, "uint16_t visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::Int(16, false),
|
||||
parquet::Type::INT32));
|
||||
}
|
||||
|
||||
template <>
|
||||
void visit<int32_t>(std::string_view k) {
|
||||
XLOGF(DBG3, "int32_t visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::Int(32, true),
|
||||
parquet::Type::INT32));
|
||||
}
|
||||
|
||||
template <>
|
||||
void visit<uint32_t>(std::string_view k) {
|
||||
XLOGF(DBG3, "uint32_t visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::Int(32, false),
|
||||
parquet::Type::INT32));
|
||||
}
|
||||
|
||||
template <>
|
||||
void visit<int64_t>(std::string_view k) {
|
||||
XLOGF(DBG3, "int64_t visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::Int(64, true),
|
||||
parquet::Type::INT64));
|
||||
}
|
||||
|
||||
template <>
|
||||
void visit<uint64_t>(std::string_view k) {
|
||||
XLOGF(DBG3, "uint64_t visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::Int(64, false),
|
||||
parquet::Type::INT64));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "enum visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::Int(32, true),
|
||||
parquet::Type::INT32));
|
||||
}
|
||||
|
||||
template <serde::ConvertibleToString T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "string visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::String(),
|
||||
parquet::Type::BYTE_ARRAY));
|
||||
}
|
||||
|
||||
template <StrongTyped T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "strongtyped visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
BaseStructVisitor<SerdeSchemaBuilder>::template visit<T>(k);
|
||||
}
|
||||
|
||||
template <serde::WithReadableSerdeMethod T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "WithReadableSerdeMethod visit({})", k);
|
||||
visit<serde::SerdeToReadableReturnType<T>>(k);
|
||||
}
|
||||
|
||||
template <serde::WithSerdeMethod T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "WithSerdeMethod visit({})", k);
|
||||
visit<serde::SerdeToReturnType<T>>(k);
|
||||
}
|
||||
|
||||
template <serde::WithReadableSerdeMemberMethod T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "WithReadableSerdeMemberMethod visit({})", k);
|
||||
visit<serde::SerdeToReadableMemberMethodReturnType<T>>(k);
|
||||
}
|
||||
|
||||
template <serde::WithSerdeMemberMethod T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "WithSerdeMemberMethod visit({})", k);
|
||||
visit<serde::SerdeToMemberMethodReturnType<T>>(k);
|
||||
}
|
||||
|
||||
template <serde::SerdeTypeWithoutSpecializedSerdeMethod T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "serdetype visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
if (!k.empty()) fieldNameParts_.push_back(filterOutInvalidChars(k));
|
||||
BaseStructVisitor<SerdeSchemaBuilder>::template visit<T>(k);
|
||||
if (!k.empty()) fieldNameParts_.pop_back();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_specialization_of_v<T, folly::Expected>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "result visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
std::string errorColumnName = std::string{k} + kResultErrorTypeColumnSuffix;
|
||||
visit<typename T::error_type>(errorColumnName);
|
||||
visit<typename T::value_type>(k);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_variant_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "variant visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
std::string valIdxColumnName = std::string{k} + kVariantValueIndexColumnSuffix;
|
||||
visit<uint32_t>(valIdxColumnName);
|
||||
BaseStructVisitor<SerdeSchemaBuilder>::template visit<T>(k);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_vector_v<T> || is_set_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "container visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::String(),
|
||||
parquet::Type::BYTE_ARRAY));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_optional_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "container visit({}), fullname: '{}'", k, getFieldFullName(k));
|
||||
fields_.push_back(parquet::schema::PrimitiveNode::Make(getFieldFullName(k),
|
||||
parquet::Repetition::REQUIRED,
|
||||
parquet::LogicalType::String(),
|
||||
parquet::Type::BYTE_ARRAY));
|
||||
}
|
||||
|
||||
private:
|
||||
std::string getFieldFullName(std::string_view k) {
|
||||
fieldNameParts_.push_back(filterOutInvalidChars(k));
|
||||
auto fieldFullName = std::accumulate(fieldNameParts_.begin(),
|
||||
fieldNameParts_.end(),
|
||||
std::string{},
|
||||
[](const std::string &a, const std::string &b) {
|
||||
return a + (a.empty() ? std::string{} : std::string("_")) + b;
|
||||
});
|
||||
fieldNameParts_.pop_back();
|
||||
return fieldFullName;
|
||||
}
|
||||
|
||||
std::string filterOutInvalidChars(std::string_view k) {
|
||||
return std::accumulate(k.begin(), k.end(), std::string{}, [](const std::string &a, const char b) {
|
||||
if (('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9')) return a + b;
|
||||
return a;
|
||||
});
|
||||
};
|
||||
|
||||
private:
|
||||
parquet::schema::NodeVector fields_;
|
||||
std::vector<std::string> fieldNameParts_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::analytics
|
||||
123
src/analytics/SerdeStructVisitor.h
Normal file
123
src/analytics/SerdeStructVisitor.h
Normal file
@@ -0,0 +1,123 @@
|
||||
#pragma once
|
||||
|
||||
#include <folly/logging/xlog.h>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "Common.h"
|
||||
#include "common/serde/Serde.h"
|
||||
#include "common/utils/Nameof.hpp"
|
||||
#include "common/utils/StrongType.h"
|
||||
#include "common/utils/TypeTraits.h"
|
||||
|
||||
namespace hf3fs::analytics {
|
||||
|
||||
class StructVisitor {
|
||||
public:
|
||||
// default
|
||||
template <typename T>
|
||||
void visit(std::string_view) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
void visit(std::string_view) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void visit(std::string_view) = delete;
|
||||
|
||||
template <StrongTyped T>
|
||||
void visit(std::string_view) = delete;
|
||||
|
||||
template <serde::SerdeTypeWithoutSpecializedSerdeMethod T>
|
||||
void visit(std::string_view) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires is_variant_v<T>
|
||||
void visit(std::string_view) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires is_vector_v<T> || is_set_v<T>
|
||||
void visit(std::string_view) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires is_optional_v<T>
|
||||
void visit(std::string_view) = delete;
|
||||
};
|
||||
|
||||
template <class T, size_t I = 0>
|
||||
inline void visitVariant(auto &&func) {
|
||||
using S = std::variant_alternative_t<I, T>;
|
||||
func(nameof::nameof_short_type<S>(), std::type_identity<S>{});
|
||||
if constexpr (I + 1 < std::variant_size_v<T>) {
|
||||
visitVariant<T, I + 1>(func);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
class BaseStructVisitor : public StructVisitor {
|
||||
public:
|
||||
// default
|
||||
template <typename T>
|
||||
void visit(std::string_view k) = delete;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_arithmetic_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "arithmetic visit({})", k);
|
||||
static_cast<Derived *>(this)->template visit<T>(k);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void visit(std::string_view k) = delete;
|
||||
|
||||
template <serde::ConvertibleToString T>
|
||||
void visit(std::string_view k) = delete;
|
||||
|
||||
template <StrongTyped T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "strongtyped visit({})", k);
|
||||
static_cast<Derived *>(this)->template visit<typename T::UnderlyingType>(k);
|
||||
}
|
||||
|
||||
template <serde::SerdeTypeWithoutSpecializedSerdeMethod T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "serdetype visit({})", k);
|
||||
refl::Helper::iterate<T>([&](auto field) {
|
||||
using FieldType = std::decay_t<decltype(std::declval<T>().*field.getter)>;
|
||||
static_cast<Derived *>(this)->template visit<FieldType>(field.name);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_variant_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "variant visit({})", k);
|
||||
visitVariant<T>([&](std::string_view typeName, auto &&v) {
|
||||
using AlternativeType = typename std::decay_t<decltype(v)>::type;
|
||||
std::string altTypeName = std::string{k} + std::string{typeName};
|
||||
static_cast<Derived *>(this)->template visit<AlternativeType>(altTypeName);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_vector_v<T> || is_set_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "container visit({})", k);
|
||||
using ElemValueType = typename T::value_type;
|
||||
static_cast<Derived *>(this)->template visit<ElemValueType>(k);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_optional_v<T>
|
||||
void visit(std::string_view k) {
|
||||
XLOGF(DBG3, "optional visit({})", k);
|
||||
using ValueType = typename T::value_type;
|
||||
static_cast<Derived *>(this)->template visit<ValueType>(k);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace hf3fs::analytics
|
||||
285
src/analytics/StructuredTraceLog.h
Normal file
285
src/analytics/StructuredTraceLog.h
Normal file
@@ -0,0 +1,285 @@
|
||||
#pragma once
|
||||
|
||||
#include <folly/Random.h>
|
||||
#include <folly/concurrency/UnboundedQueue.h>
|
||||
#include <future>
|
||||
|
||||
#include "SerdeObjectWriter.h"
|
||||
#include "common/monitor/Recorder.h"
|
||||
#include "common/monitor/ScopedMetricsWriter.h"
|
||||
#include "common/utils/ConfigBase.h"
|
||||
#include "common/utils/Path.h"
|
||||
#include "common/utils/SysResource.h"
|
||||
#include "common/utils/UtcTime.h"
|
||||
|
||||
namespace hf3fs::analytics {
|
||||
|
||||
template <serde::SerdeType SerdeType>
|
||||
class StructuredTraceLog : public folly::MoveOnly {
|
||||
struct TraceMeta {
|
||||
SERDE_STRUCT_FIELD(timestamp, std::time_t{});
|
||||
SERDE_STRUCT_FIELD(hostname, String{});
|
||||
};
|
||||
|
||||
struct StructuredTrace {
|
||||
SERDE_STRUCT_FIELD(trace_meta, TraceMeta{});
|
||||
SERDE_STRUCT_FIELD(_, SerdeType{});
|
||||
};
|
||||
|
||||
using WriterType = SerdeObjectWriter<StructuredTrace>;
|
||||
using WriterPtr = std::shared_ptr<WriterType>;
|
||||
|
||||
public:
|
||||
class Config : public hf3fs::ConfigBase<Config> {
|
||||
public:
|
||||
CONFIG_ITEM(trace_file_dir, Path{"."});
|
||||
#ifndef NDEBUG
|
||||
CONFIG_HOT_UPDATED_ITEM(enabled, false);
|
||||
CONFIG_HOT_UPDATED_ITEM(dump_interval, 60_min);
|
||||
#else
|
||||
CONFIG_HOT_UPDATED_ITEM(enabled, true);
|
||||
CONFIG_HOT_UPDATED_ITEM(dump_interval, 30_s);
|
||||
#endif
|
||||
CONFIG_HOT_UPDATED_ITEM(max_num_writers, size_t{1}, ConfigCheckers::checkPositive);
|
||||
CONFIG_HOT_UPDATED_ITEM(max_row_group_length, size_t{100'000});
|
||||
};
|
||||
|
||||
public:
|
||||
StructuredTraceLog(const Config &config)
|
||||
: config_(config),
|
||||
enabled_(config.enabled()),
|
||||
typename_(nameof::nameof_short_type<SerdeType>()),
|
||||
hostname_(SysResource::hostname().value_or("unknown_host")),
|
||||
latencyTagSet_({{"tag", typename_}, {"instance", fmt::to_string(fmt::ptr(this))}}),
|
||||
createLatency_("trace_log.create_latency", latencyTagSet_),
|
||||
appendLatency_("trace_log.append_latency", latencyTagSet_),
|
||||
flushLatency_("trace_log.flush_latency", latencyTagSet_),
|
||||
maxNumWriters_(config.max_num_writers()) {
|
||||
onConfigUpdated_ = config_.addCallbackGuard([this]() {
|
||||
bool enabled = config_.enabled();
|
||||
if (enabled_ != enabled) {
|
||||
enableTraceLog(enabled);
|
||||
if (!enabled) flush(false /*async*/);
|
||||
}
|
||||
|
||||
if (maxNumWriters_ != config_.max_num_writers()) {
|
||||
updateMaxNumWriters(config_.max_num_writers());
|
||||
}
|
||||
});
|
||||
|
||||
uint64_t secsUntilFirstDump =
|
||||
folly::Random::rand64(config_.dump_interval().asSec().count() / 2, config_.dump_interval().asSec().count());
|
||||
nextDumpTime_ = microsecondsSinceEpoch(UtcClock::now() + std::chrono::seconds{secsUntilFirstDump});
|
||||
}
|
||||
|
||||
~StructuredTraceLog() { close(); }
|
||||
|
||||
bool open() {
|
||||
auto writer = getOrCreateWriter();
|
||||
if (!writer) return false;
|
||||
writerPool_.enqueue(writer);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<SerdeType> newEntry(const SerdeType &init = SerdeType{}) {
|
||||
auto ptr = new SerdeType(init);
|
||||
return std::shared_ptr<SerdeType>(ptr, [this](SerdeType *ptr) {
|
||||
this->append(*ptr);
|
||||
delete ptr;
|
||||
});
|
||||
}
|
||||
|
||||
void append(const SerdeType &msg) {
|
||||
if (!enabled_) return;
|
||||
|
||||
{
|
||||
monitor::ScopedLatencyWriter appendLatency(appendLatency_);
|
||||
StructuredTrace trace{
|
||||
.trace_meta = TraceMeta{.timestamp = UtcClock::secondsSinceEpoch(), .hostname = hostname_},
|
||||
._ = msg,
|
||||
};
|
||||
|
||||
WriterPtr writer = getOrCreateWriter();
|
||||
|
||||
if (UNLIKELY(writer == nullptr)) {
|
||||
XLOGF(CRITICAL, "Cannot get a writer of {} trace log in directory {}", typename_, config_.trace_file_dir());
|
||||
enableTraceLog(false);
|
||||
return;
|
||||
}
|
||||
|
||||
*writer << trace;
|
||||
auto writerOk = writer->ok();
|
||||
writerPool_.enqueue(std::move(writer));
|
||||
if (UNLIKELY(!writerOk)) enableTraceLog(false);
|
||||
}
|
||||
|
||||
auto currentTime = microsecondsSinceEpoch(UtcClock::now());
|
||||
|
||||
if (UNLIKELY(currentTime >= nextDumpTime_)) {
|
||||
nextDumpTime_ = currentTime + config_.dump_interval().asUs().count();
|
||||
flush(true /*async*/);
|
||||
}
|
||||
}
|
||||
|
||||
void flush(bool async, bool shutdown = false) {
|
||||
auto running = dumpingTrace_.test_and_set();
|
||||
if (running) return;
|
||||
|
||||
monitor::ScopedLatencyWriter flushLatency(flushLatency_);
|
||||
if (asyncFlush_.valid()) asyncFlush_.wait();
|
||||
|
||||
asyncFlush_ = std::async(
|
||||
std::launch::async,
|
||||
[this](bool shutdown) {
|
||||
size_t numWritersToClose = numWriters_.load();
|
||||
auto now = UtcClock::now();
|
||||
|
||||
XLOGF(INFO,
|
||||
"Flushing {} {} log writers in directory {}",
|
||||
numWritersToClose,
|
||||
typename_,
|
||||
config_.trace_file_dir());
|
||||
|
||||
for (size_t i = 0; numWritersToClose > 0; i++) {
|
||||
// give up flushing old writers after trying for too many loops
|
||||
if (i >= 10 * maxNumWriters_) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto writer = writerPool_.dequeue();
|
||||
if (!writer) continue;
|
||||
|
||||
if (writer->createTime() > now) {
|
||||
writerPool_.enqueue(writer);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (writer->ok()) {
|
||||
// add an empty trace at the end of log
|
||||
*writer << StructuredTrace{
|
||||
.trace_meta = {.timestamp = UtcClock::secondsSinceEpoch(), .hostname = hostname_}};
|
||||
}
|
||||
|
||||
try {
|
||||
writer.reset();
|
||||
} catch (const std::exception &ex) {
|
||||
XLOGF(ERR,
|
||||
"Failed to close {} log writer in directory {}, error: {}",
|
||||
typename_,
|
||||
config_.trace_file_dir(),
|
||||
ex.what());
|
||||
}
|
||||
|
||||
if (shutdown)
|
||||
numWriters_--;
|
||||
else
|
||||
writerPool_.enqueue(createNewWriter());
|
||||
|
||||
numWritersToClose--;
|
||||
}
|
||||
|
||||
if (numWritersToClose > 0) {
|
||||
XLOGF(WARN,
|
||||
"Still have {} {} log writers not closed in directory {}",
|
||||
numWritersToClose,
|
||||
typename_,
|
||||
config_.trace_file_dir());
|
||||
} else {
|
||||
XLOGF(INFO, "Flushed {} trace log in directory {}", typename_, config_.trace_file_dir());
|
||||
}
|
||||
},
|
||||
shutdown);
|
||||
|
||||
if (!async) asyncFlush_.wait();
|
||||
dumpingTrace_.clear();
|
||||
}
|
||||
|
||||
void close() {
|
||||
enableTraceLog(false);
|
||||
flush(false /*async*/, true /*shutdown*/);
|
||||
XLOGF(INFO, "Closed {} trace log in directory {}", typename_, config_.trace_file_dir());
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t microsecondsSinceEpoch(const UtcTime &time) const {
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>((time).time_since_epoch()).count();
|
||||
}
|
||||
|
||||
WriterPtr getOrCreateWriter() {
|
||||
WriterPtr writer;
|
||||
if (writerPool_.try_dequeue(writer)) return writer;
|
||||
|
||||
auto currentNumWriters = numWriters_.load();
|
||||
if (currentNumWriters < maxNumWriters_) {
|
||||
bool create = numWriters_.compare_exchange_strong(currentNumWriters, currentNumWriters + 1);
|
||||
if (create) return createNewWriter();
|
||||
}
|
||||
|
||||
return writerPool_.dequeue();
|
||||
}
|
||||
|
||||
WriterPtr createNewWriter() {
|
||||
monitor::ScopedLatencyWriter createLatency(createLatency_);
|
||||
auto timestamp = fmt::localtime(UtcClock::to_time_t(UtcClock::now()));
|
||||
Path logfilePath =
|
||||
config_.trace_file_dir() / Path{fmt::format("{:%Y-%m-%d}", timestamp)} / Path{hostname_} /
|
||||
Path{
|
||||
fmt::format("{}.{}.{:%Y-%m-%d-%H-%M-%S}.{}.parquet", typename_, hostname_, timestamp, nextLogFileIndex_++)};
|
||||
|
||||
if (!boost::filesystem::exists(logfilePath.parent_path())) {
|
||||
boost::system::error_code err{};
|
||||
boost::filesystem::create_directories(logfilePath.parent_path(), err);
|
||||
if (UNLIKELY(err.failed())) {
|
||||
XLOGF(CRITICAL, "Failed to create directory {}, error: {}", logfilePath.parent_path(), err.message());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
XLOGF(INFO, "Opening {} trace log: {}", typename_, logfilePath);
|
||||
return WriterType::open(logfilePath, false /*append*/, config_.max_row_group_length());
|
||||
}
|
||||
|
||||
void enableTraceLog(bool enable) {
|
||||
enabled_ = enable;
|
||||
XLOGF(INFO,
|
||||
"{} {} trace log in directory {}",
|
||||
enable ? "Enabled" : "Disabled",
|
||||
typename_,
|
||||
config_.trace_file_dir());
|
||||
}
|
||||
|
||||
void updateMaxNumWriters(size_t newMaxNumWriters) {
|
||||
XLOGF(INFO,
|
||||
"Update max num of writers from {} to {} for {} trace log in directory {}",
|
||||
maxNumWriters_.load(),
|
||||
newMaxNumWriters,
|
||||
typename_,
|
||||
config_.trace_file_dir());
|
||||
bool doFlush = maxNumWriters_ > newMaxNumWriters;
|
||||
maxNumWriters_ = newMaxNumWriters;
|
||||
if (doFlush) flush(false /*async*/);
|
||||
}
|
||||
|
||||
private:
|
||||
const Config &config_;
|
||||
bool enabled_ = false;
|
||||
const std::string typename_;
|
||||
const std::string hostname_;
|
||||
|
||||
const monitor::TagSet latencyTagSet_;
|
||||
monitor::LatencyRecorder createLatency_;
|
||||
monitor::LatencyRecorder appendLatency_;
|
||||
monitor::LatencyRecorder flushLatency_;
|
||||
|
||||
std::unique_ptr<ConfigCallbackGuard> onConfigUpdated_;
|
||||
std::atomic_size_t maxNumWriters_;
|
||||
std::atomic_size_t numWriters_ = 0;
|
||||
std::atomic_size_t nextLogFileIndex_ = 1;
|
||||
folly::UnboundedQueue<WriterPtr, false, false, true> writerPool_;
|
||||
|
||||
std::atomic_uint64_t nextDumpTime_;
|
||||
std::atomic_flag dumpingTrace_;
|
||||
std::future<void> asyncFlush_;
|
||||
};
|
||||
|
||||
} // namespace hf3fs::analytics
|
||||
Reference in New Issue
Block a user