Browse Source

perf: ShortCode conversion speed

master
Dnomd343 2 weeks ago
parent
commit
cc896d6497
  1. 2
      src/core/CMakeLists.txt
  2. 36
      src/core/benchmark/codec.cc
  3. 36
      src/core/short_code/internal/convert.cc
  4. 27
      src/core/short_code/internal/short_code.inl
  5. 11
      src/core/short_code/short_code.h

2
src/core/CMakeLists.txt

@ -42,7 +42,7 @@ add_executable(klotski_core_bin main.cc)
target_link_libraries(klotski_core_bin PRIVATE klotski_core) target_link_libraries(klotski_core_bin PRIVATE klotski_core)
add_executable(codec_benchmark benchmark/codec.cc) add_executable(codec_benchmark benchmark/codec.cc)
target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions) target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions -fno-access-control)
target_link_libraries(codec_benchmark PRIVATE klotski::core benchmark::benchmark_main) target_link_libraries(codec_benchmark PRIVATE klotski::core benchmark::benchmark_main)
add_executable(group_benchmark benchmark/group.cc) add_executable(group_benchmark benchmark/group.cc)

36
src/core/benchmark/codec.cc

@ -2,11 +2,14 @@
#include <benchmark/benchmark.h> #include <benchmark/benchmark.h>
#define private public // #define private public
#include "group/group.h" #include "group/group.h"
#include "all_cases/all_cases.h" #include "all_cases/all_cases.h"
#include "common_code/common_code.h" #include "common_code/common_code.h"
#undef private // #undef private
using klotski::codec::ShortCode;
using klotski::codec::CommonCode;
using klotski::cases::AllCases; using klotski::cases::AllCases;
using klotski::codec::CommonCode; using klotski::codec::CommonCode;
@ -192,13 +195,38 @@ static void ShortCodeDeserialize(benchmark::State &state) {
} }
static void ShortCodeToCommonCode(benchmark::State &state) {
// ShortCode::speed_up(true);
ShortCode::speed_up(false);
// ShortCode::fast_decode(4091296);
auto short_code = CommonCode::unsafe_create(0x1A9BF0C00).to_short_code();
for (auto _ : state) {
volatile auto kk = short_code.to_common_code();
// if (AllCases::instance().is_available()) {
// if (ShortCode::stage_ == ShortCode::Stage::FAST) {
// volatile auto pp = ShortCode::fast_decode(4091296);
// }
// }
}
}
// BENCHMARK(CommonCodeSerialize)->Range(8, 256); // BENCHMARK(CommonCodeSerialize)->Range(8, 256);
// BENCHMARK(CommonCodeDeserialize)->Range(8, 256); // BENCHMARK(CommonCodeDeserialize)->Range(8, 256);
// BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256); // BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256);
// BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256); // BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256);
BENCHMARK(ShortCodeSerialize)->Range(8, 256); // BENCHMARK(ShortCodeSerialize)->Range(8, 256);
BENCHMARK(ShortCodeDeserialize)->Range(8, 256); // BENCHMARK(ShortCodeDeserialize)->Range(8, 256);
BENCHMARK(ShortCodeToCommonCode);
// static void CommonCodeDecode(benchmark::State &state) { // static void CommonCodeDecode(benchmark::State &state) {
// const auto tmp = str_common_codes(state.range(0)); // const auto tmp = str_common_codes(state.range(0));

36
src/core/short_code/internal/convert.cc

@ -50,6 +50,15 @@ static uint32_t check_range(uint32_t head, uint32_t range) noexcept {
return 0; // pass check return 0; // pass check
} }
std::mutex ShortCode::busy_ {};
// ShortCode::Stage ShortCode::stage_ = Stage::UNINIT;
// const klotski::cases::RangesUnion *ShortCode::cases_ = &AllCases::instance().fetch();
const klotski::cases::RangesUnion *ShortCode::cases_ = nullptr;
const klotski::cases::Ranges *ShortCode::ranges_ = nullptr;
uint32_t ShortCode::fast_encode(uint64_t common_code) { uint32_t ShortCode::fast_encode(uint64_t common_code) {
auto head = common_code >> 32; auto head = common_code >> 32;
auto &ranges = AllCases::instance().fetch()[head]; // match available ranges auto &ranges = AllCases::instance().fetch()[head]; // match available ranges
@ -60,7 +69,8 @@ uint32_t ShortCode::fast_encode(uint64_t common_code) {
uint64_t ShortCode::fast_decode(uint32_t short_code) { uint64_t ShortCode::fast_decode(uint32_t short_code) {
auto offset = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1; auto offset = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1;
uint64_t head = offset - ALL_CASES_OFFSET.begin(); uint64_t head = offset - ALL_CASES_OFFSET.begin();
return (head << 32) | AllCases::instance().fetch()[head][short_code - *offset]; // return (head << 32) | AllCases::instance().fetch()[head][short_code - *offset];
return (head << 32) | (*cases_)[head][short_code - *offset];
} }
uint32_t ShortCode::tiny_encode(uint64_t common_code) { uint32_t ShortCode::tiny_encode(uint64_t common_code) {
@ -89,6 +99,13 @@ uint32_t ShortCode::tiny_encode(uint64_t common_code) {
} }
uint64_t ShortCode::tiny_decode(uint32_t short_code) { // short code --> common code uint64_t ShortCode::tiny_decode(uint32_t short_code) { // short code --> common code
// speed_up(false);
// std::lock_guard guard {busy_};
ranges_ = &cases::BasicRanges::instance().fetch();
// stage_ = Stage::TINY;
auto offset_ = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1; auto offset_ = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1;
auto head = offset_ - ALL_CASES_OFFSET.begin(); // head index auto head = offset_ - ALL_CASES_OFFSET.begin(); // head index
short_code -= *offset_; short_code -= *offset_;
@ -99,16 +116,21 @@ uint64_t ShortCode::tiny_decode(uint32_t short_code) { // short code --> common
/// search for target range /// search for target range
auto index = RANGES_GLOBAL_OFFSET[prefix]; auto index = RANGES_GLOBAL_OFFSET[prefix];
const auto &basic_ranges = BasicRanges::instance().fetch(); // auto basic_ranges = ranges_;
for (; index < basic_ranges.size(); ++index) { // traverse basic ranges // const auto &basic_ranges = BasicRanges::instance().fetch();
auto broken_offset = check_range(head, range_reverse(basic_ranges[index]));
const auto &ranges = *ranges_;
// const auto &ranges = BasicRanges::instance().fetch();
for (; index < ranges.size(); ++index) { // traverse basic ranges
auto broken_offset = check_range(head, range_reverse(ranges[index]));
if (!broken_offset && !short_code--) { // valid case -> short code approximate if (!broken_offset && !short_code--) { // valid case -> short code approximate
break; break;
} }
auto delta = (uint32_t)1 << (32 - broken_offset * 2); // delta to next possible range auto delta = (uint32_t)1 << (32 - broken_offset * 2); // delta to next possible range
auto next_min = (basic_ranges[index] & ~(delta - 1)) + delta; auto next_min = (ranges[index] & ~(delta - 1)) + delta;
while (basic_ranges[++index] < next_min); // located next range while (ranges[++index] < next_min); // located next range
--index; --index;
} }
return (uint64_t)head << 32 | basic_ranges[index]; return (uint64_t)head << 32 | ranges[index];
} }

27
src/core/short_code/internal/short_code.inl

@ -39,10 +39,19 @@ inline bool ShortCode::check(const uint32_t short_code) {
} }
inline void ShortCode::speed_up(const bool fast_mode) { inline void ShortCode::speed_up(const bool fast_mode) {
// TODO: keep one way change.
if (fast_mode) { if (fast_mode) {
cases::AllCases::instance().build(); // cases::AllCases::instance().build();
std::lock_guard guard {busy_};
cases_ = &cases::AllCases::instance().fetch();
stage_ = Stage::FAST;
} else { } else {
cases::BasicRanges::instance().build(); std::lock_guard guard {busy_};
// TODO: skip if stage_ is FAST
ranges_ = &cases::BasicRanges::instance().fetch();
stage_ = Stage::TINY;
} }
} }
@ -65,10 +74,20 @@ inline std::string ShortCode::to_string() const {
inline CommonCode ShortCode::to_common_code() const { inline CommonCode ShortCode::to_common_code() const {
// TODO: test the affect of CPU branch prediction. // TODO: test the affect of CPU branch prediction.
if (cases::AllCases::instance().is_available()) { // if (cases::AllCases::instance().is_available()) {
// return CommonCode::unsafe_create(fast_decode(code_));
// }
// return CommonCode::unsafe_create(tiny_decode(code_));
switch (stage_) {
case Stage::UNINIT:
// TODO: do speed up
// speed_up(false); // FIXME: slow about 3%
case Stage::TINY:
return CommonCode::unsafe_create(tiny_decode(code_));
case Stage::FAST:
return CommonCode::unsafe_create(fast_decode(code_)); return CommonCode::unsafe_create(fast_decode(code_));
} }
return CommonCode::unsafe_create(tiny_decode(code_));
} }
// ----------------------------------------------------------------------------------------- // // ----------------------------------------------------------------------------------------- //

11
src/core/short_code/short_code.h

@ -157,7 +157,7 @@ private:
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
/// Convert ShortCode to CommonCode based on AllCases data. /// Convert ShortCode to CommonCode based on AllCases data.
static uint64_t fast_decode(uint32_t short_code); static KLSK_INLINE uint64_t fast_decode(uint32_t short_code);
/// Convert CommonCode to ShortCode based on AllCases data. /// Convert CommonCode to ShortCode based on AllCases data.
static uint32_t fast_encode(uint64_t common_code); static uint32_t fast_encode(uint64_t common_code);
@ -177,6 +177,15 @@ private:
static std::optional<uint32_t> string_decode(std::string_view short_code); static std::optional<uint32_t> string_decode(std::string_view short_code);
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
enum class Stage { UNINIT, TINY, FAST };
static std::mutex busy_;
static inline auto stage_ {Stage::UNINIT};
static const cases::Ranges *ranges_;
static const cases::RangesUnion *cases_;
}; };
} // namespace klotski::codec } // namespace klotski::codec

Loading…
Cancel
Save