From cc896d6497a47bb0bc33f4bdf44d713778fc1649 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 23 Jun 2024 17:38:14 +0800 Subject: [PATCH] perf: ShortCode conversion speed --- src/core/CMakeLists.txt | 2 +- src/core/benchmark/codec.cc | 36 ++++++++++++++++++--- src/core/short_code/internal/convert.cc | 36 +++++++++++++++++---- src/core/short_code/internal/short_code.inl | 27 +++++++++++++--- src/core/short_code/short_code.h | 11 ++++++- 5 files changed, 95 insertions(+), 17 deletions(-) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f765d74..49e1e6e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -42,7 +42,7 @@ add_executable(klotski_core_bin main.cc) target_link_libraries(klotski_core_bin PRIVATE klotski_core) add_executable(codec_benchmark benchmark/codec.cc) -target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions) +target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions -fno-access-control) target_link_libraries(codec_benchmark PRIVATE klotski::core benchmark::benchmark_main) add_executable(group_benchmark benchmark/group.cc) diff --git a/src/core/benchmark/codec.cc b/src/core/benchmark/codec.cc index 60da97c..c19307e 100644 --- a/src/core/benchmark/codec.cc +++ b/src/core/benchmark/codec.cc @@ -2,11 +2,14 @@ #include -#define private public +// #define private public #include "group/group.h" #include "all_cases/all_cases.h" #include "common_code/common_code.h" -#undef private +// #undef private + +using klotski::codec::ShortCode; +using klotski::codec::CommonCode; using klotski::cases::AllCases; using klotski::codec::CommonCode; @@ -192,13 +195,38 @@ static void ShortCodeDeserialize(benchmark::State &state) { } +static void ShortCodeToCommonCode(benchmark::State &state) { + + // ShortCode::speed_up(true); + ShortCode::speed_up(false); + + // ShortCode::fast_decode(4091296); + + auto short_code = CommonCode::unsafe_create(0x1A9BF0C00).to_short_code(); + + for (auto _ : state) { + + volatile auto kk = short_code.to_common_code(); + + // if (AllCases::instance().is_available()) { + // if (ShortCode::stage_ == ShortCode::Stage::FAST) { + // volatile auto pp = ShortCode::fast_decode(4091296); + // } + // } + + } + +} + // BENCHMARK(CommonCodeSerialize)->Range(8, 256); // BENCHMARK(CommonCodeDeserialize)->Range(8, 256); // BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256); // BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256); -BENCHMARK(ShortCodeSerialize)->Range(8, 256); -BENCHMARK(ShortCodeDeserialize)->Range(8, 256); +// BENCHMARK(ShortCodeSerialize)->Range(8, 256); +// BENCHMARK(ShortCodeDeserialize)->Range(8, 256); + +BENCHMARK(ShortCodeToCommonCode); // static void CommonCodeDecode(benchmark::State &state) { // const auto tmp = str_common_codes(state.range(0)); diff --git a/src/core/short_code/internal/convert.cc b/src/core/short_code/internal/convert.cc index 428ed21..2e6f4d9 100644 --- a/src/core/short_code/internal/convert.cc +++ b/src/core/short_code/internal/convert.cc @@ -50,6 +50,15 @@ static uint32_t check_range(uint32_t head, uint32_t range) noexcept { return 0; // pass check } +std::mutex ShortCode::busy_ {}; + +// ShortCode::Stage ShortCode::stage_ = Stage::UNINIT; + +// const klotski::cases::RangesUnion *ShortCode::cases_ = &AllCases::instance().fetch(); +const klotski::cases::RangesUnion *ShortCode::cases_ = nullptr; + +const klotski::cases::Ranges *ShortCode::ranges_ = nullptr; + uint32_t ShortCode::fast_encode(uint64_t common_code) { auto head = common_code >> 32; auto &ranges = AllCases::instance().fetch()[head]; // match available ranges @@ -60,7 +69,8 @@ uint32_t ShortCode::fast_encode(uint64_t common_code) { uint64_t ShortCode::fast_decode(uint32_t short_code) { auto offset = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1; uint64_t head = offset - ALL_CASES_OFFSET.begin(); - return (head << 32) | AllCases::instance().fetch()[head][short_code - *offset]; + // return (head << 32) | AllCases::instance().fetch()[head][short_code - *offset]; + return (head << 32) | (*cases_)[head][short_code - *offset]; } uint32_t ShortCode::tiny_encode(uint64_t common_code) { @@ -89,6 +99,13 @@ uint32_t ShortCode::tiny_encode(uint64_t common_code) { } uint64_t ShortCode::tiny_decode(uint32_t short_code) { // short code --> common code + // speed_up(false); + + // std::lock_guard guard {busy_}; + + ranges_ = &cases::BasicRanges::instance().fetch(); + // stage_ = Stage::TINY; + auto offset_ = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1; auto head = offset_ - ALL_CASES_OFFSET.begin(); // head index short_code -= *offset_; @@ -99,16 +116,21 @@ uint64_t ShortCode::tiny_decode(uint32_t short_code) { // short code --> common /// search for target range auto index = RANGES_GLOBAL_OFFSET[prefix]; - const auto &basic_ranges = BasicRanges::instance().fetch(); - for (; index < basic_ranges.size(); ++index) { // traverse basic ranges - auto broken_offset = check_range(head, range_reverse(basic_ranges[index])); + // auto basic_ranges = ranges_; + // const auto &basic_ranges = BasicRanges::instance().fetch(); + + const auto &ranges = *ranges_; + // const auto &ranges = BasicRanges::instance().fetch(); + + for (; index < ranges.size(); ++index) { // traverse basic ranges + auto broken_offset = check_range(head, range_reverse(ranges[index])); if (!broken_offset && !short_code--) { // valid case -> short code approximate break; } auto delta = (uint32_t)1 << (32 - broken_offset * 2); // delta to next possible range - auto next_min = (basic_ranges[index] & ~(delta - 1)) + delta; - while (basic_ranges[++index] < next_min); // located next range + auto next_min = (ranges[index] & ~(delta - 1)) + delta; + while (ranges[++index] < next_min); // located next range --index; } - return (uint64_t)head << 32 | basic_ranges[index]; + return (uint64_t)head << 32 | ranges[index]; } diff --git a/src/core/short_code/internal/short_code.inl b/src/core/short_code/internal/short_code.inl index b12251c..5f04047 100644 --- a/src/core/short_code/internal/short_code.inl +++ b/src/core/short_code/internal/short_code.inl @@ -39,10 +39,19 @@ inline bool ShortCode::check(const uint32_t short_code) { } inline void ShortCode::speed_up(const bool fast_mode) { + // TODO: keep one way change. if (fast_mode) { - cases::AllCases::instance().build(); + // cases::AllCases::instance().build(); + std::lock_guard guard {busy_}; + cases_ = &cases::AllCases::instance().fetch(); + stage_ = Stage::FAST; } else { - cases::BasicRanges::instance().build(); + std::lock_guard guard {busy_}; + + // TODO: skip if stage_ is FAST + + ranges_ = &cases::BasicRanges::instance().fetch(); + stage_ = Stage::TINY; } } @@ -65,10 +74,20 @@ inline std::string ShortCode::to_string() const { inline CommonCode ShortCode::to_common_code() const { // TODO: test the affect of CPU branch prediction. - if (cases::AllCases::instance().is_available()) { + // if (cases::AllCases::instance().is_available()) { + // return CommonCode::unsafe_create(fast_decode(code_)); + // } + // return CommonCode::unsafe_create(tiny_decode(code_)); + + switch (stage_) { + case Stage::UNINIT: + // TODO: do speed up + // speed_up(false); // FIXME: slow about 3% + case Stage::TINY: + return CommonCode::unsafe_create(tiny_decode(code_)); + case Stage::FAST: return CommonCode::unsafe_create(fast_decode(code_)); } - return CommonCode::unsafe_create(tiny_decode(code_)); } // ----------------------------------------------------------------------------------------- // diff --git a/src/core/short_code/short_code.h b/src/core/short_code/short_code.h index 20e95ef..b39efa5 100644 --- a/src/core/short_code/short_code.h +++ b/src/core/short_code/short_code.h @@ -157,7 +157,7 @@ private: // ------------------------------------------------------------------------------------- // /// Convert ShortCode to CommonCode based on AllCases data. - static uint64_t fast_decode(uint32_t short_code); + static KLSK_INLINE uint64_t fast_decode(uint32_t short_code); /// Convert CommonCode to ShortCode based on AllCases data. static uint32_t fast_encode(uint64_t common_code); @@ -177,6 +177,15 @@ private: static std::optional string_decode(std::string_view short_code); // ------------------------------------------------------------------------------------- // + + enum class Stage { UNINIT, TINY, FAST }; + + static std::mutex busy_; + + static inline auto stage_ {Stage::UNINIT}; + + static const cases::Ranges *ranges_; + static const cases::RangesUnion *cases_; }; } // namespace klotski::codec