Browse Source

perf: ShortCode conversion speed

master
Dnomd343 5 months ago
parent
commit
cc896d6497
  1. 2
      src/core/CMakeLists.txt
  2. 36
      src/core/benchmark/codec.cc
  3. 36
      src/core/short_code/internal/convert.cc
  4. 27
      src/core/short_code/internal/short_code.inl
  5. 11
      src/core/short_code/short_code.h

2
src/core/CMakeLists.txt

@ -42,7 +42,7 @@ add_executable(klotski_core_bin main.cc)
target_link_libraries(klotski_core_bin PRIVATE klotski_core)
add_executable(codec_benchmark benchmark/codec.cc)
target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions)
target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions -fno-access-control)
target_link_libraries(codec_benchmark PRIVATE klotski::core benchmark::benchmark_main)
add_executable(group_benchmark benchmark/group.cc)

36
src/core/benchmark/codec.cc

@ -2,11 +2,14 @@
#include <benchmark/benchmark.h>
#define private public
// #define private public
#include "group/group.h"
#include "all_cases/all_cases.h"
#include "common_code/common_code.h"
#undef private
// #undef private
using klotski::codec::ShortCode;
using klotski::codec::CommonCode;
using klotski::cases::AllCases;
using klotski::codec::CommonCode;
@ -192,13 +195,38 @@ static void ShortCodeDeserialize(benchmark::State &state) {
}
static void ShortCodeToCommonCode(benchmark::State &state) {
// ShortCode::speed_up(true);
ShortCode::speed_up(false);
// ShortCode::fast_decode(4091296);
auto short_code = CommonCode::unsafe_create(0x1A9BF0C00).to_short_code();
for (auto _ : state) {
volatile auto kk = short_code.to_common_code();
// if (AllCases::instance().is_available()) {
// if (ShortCode::stage_ == ShortCode::Stage::FAST) {
// volatile auto pp = ShortCode::fast_decode(4091296);
// }
// }
}
}
// BENCHMARK(CommonCodeSerialize)->Range(8, 256);
// BENCHMARK(CommonCodeDeserialize)->Range(8, 256);
// BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256);
// BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256);
BENCHMARK(ShortCodeSerialize)->Range(8, 256);
BENCHMARK(ShortCodeDeserialize)->Range(8, 256);
// BENCHMARK(ShortCodeSerialize)->Range(8, 256);
// BENCHMARK(ShortCodeDeserialize)->Range(8, 256);
BENCHMARK(ShortCodeToCommonCode);
// static void CommonCodeDecode(benchmark::State &state) {
// const auto tmp = str_common_codes(state.range(0));

36
src/core/short_code/internal/convert.cc

@ -50,6 +50,15 @@ static uint32_t check_range(uint32_t head, uint32_t range) noexcept {
return 0; // pass check
}
std::mutex ShortCode::busy_ {};
// ShortCode::Stage ShortCode::stage_ = Stage::UNINIT;
// const klotski::cases::RangesUnion *ShortCode::cases_ = &AllCases::instance().fetch();
const klotski::cases::RangesUnion *ShortCode::cases_ = nullptr;
const klotski::cases::Ranges *ShortCode::ranges_ = nullptr;
uint32_t ShortCode::fast_encode(uint64_t common_code) {
auto head = common_code >> 32;
auto &ranges = AllCases::instance().fetch()[head]; // match available ranges
@ -60,7 +69,8 @@ uint32_t ShortCode::fast_encode(uint64_t common_code) {
uint64_t ShortCode::fast_decode(uint32_t short_code) {
auto offset = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1;
uint64_t head = offset - ALL_CASES_OFFSET.begin();
return (head << 32) | AllCases::instance().fetch()[head][short_code - *offset];
// return (head << 32) | AllCases::instance().fetch()[head][short_code - *offset];
return (head << 32) | (*cases_)[head][short_code - *offset];
}
uint32_t ShortCode::tiny_encode(uint64_t common_code) {
@ -89,6 +99,13 @@ uint32_t ShortCode::tiny_encode(uint64_t common_code) {
}
uint64_t ShortCode::tiny_decode(uint32_t short_code) { // short code --> common code
// speed_up(false);
// std::lock_guard guard {busy_};
ranges_ = &cases::BasicRanges::instance().fetch();
// stage_ = Stage::TINY;
auto offset_ = std::upper_bound(ALL_CASES_OFFSET.begin(), ALL_CASES_OFFSET.end(), short_code) - 1;
auto head = offset_ - ALL_CASES_OFFSET.begin(); // head index
short_code -= *offset_;
@ -99,16 +116,21 @@ uint64_t ShortCode::tiny_decode(uint32_t short_code) { // short code --> common
/// search for target range
auto index = RANGES_GLOBAL_OFFSET[prefix];
const auto &basic_ranges = BasicRanges::instance().fetch();
for (; index < basic_ranges.size(); ++index) { // traverse basic ranges
auto broken_offset = check_range(head, range_reverse(basic_ranges[index]));
// auto basic_ranges = ranges_;
// const auto &basic_ranges = BasicRanges::instance().fetch();
const auto &ranges = *ranges_;
// const auto &ranges = BasicRanges::instance().fetch();
for (; index < ranges.size(); ++index) { // traverse basic ranges
auto broken_offset = check_range(head, range_reverse(ranges[index]));
if (!broken_offset && !short_code--) { // valid case -> short code approximate
break;
}
auto delta = (uint32_t)1 << (32 - broken_offset * 2); // delta to next possible range
auto next_min = (basic_ranges[index] & ~(delta - 1)) + delta;
while (basic_ranges[++index] < next_min); // located next range
auto next_min = (ranges[index] & ~(delta - 1)) + delta;
while (ranges[++index] < next_min); // located next range
--index;
}
return (uint64_t)head << 32 | basic_ranges[index];
return (uint64_t)head << 32 | ranges[index];
}

27
src/core/short_code/internal/short_code.inl

@ -39,10 +39,19 @@ inline bool ShortCode::check(const uint32_t short_code) {
}
inline void ShortCode::speed_up(const bool fast_mode) {
// TODO: keep one way change.
if (fast_mode) {
cases::AllCases::instance().build();
// cases::AllCases::instance().build();
std::lock_guard guard {busy_};
cases_ = &cases::AllCases::instance().fetch();
stage_ = Stage::FAST;
} else {
cases::BasicRanges::instance().build();
std::lock_guard guard {busy_};
// TODO: skip if stage_ is FAST
ranges_ = &cases::BasicRanges::instance().fetch();
stage_ = Stage::TINY;
}
}
@ -65,10 +74,20 @@ inline std::string ShortCode::to_string() const {
inline CommonCode ShortCode::to_common_code() const {
// TODO: test the affect of CPU branch prediction.
if (cases::AllCases::instance().is_available()) {
// if (cases::AllCases::instance().is_available()) {
// return CommonCode::unsafe_create(fast_decode(code_));
// }
// return CommonCode::unsafe_create(tiny_decode(code_));
switch (stage_) {
case Stage::UNINIT:
// TODO: do speed up
// speed_up(false); // FIXME: slow about 3%
case Stage::TINY:
return CommonCode::unsafe_create(tiny_decode(code_));
case Stage::FAST:
return CommonCode::unsafe_create(fast_decode(code_));
}
return CommonCode::unsafe_create(tiny_decode(code_));
}
// ----------------------------------------------------------------------------------------- //

11
src/core/short_code/short_code.h

@ -157,7 +157,7 @@ private:
// ------------------------------------------------------------------------------------- //
/// Convert ShortCode to CommonCode based on AllCases data.
static uint64_t fast_decode(uint32_t short_code);
static KLSK_INLINE uint64_t fast_decode(uint32_t short_code);
/// Convert CommonCode to ShortCode based on AllCases data.
static uint32_t fast_encode(uint64_t common_code);
@ -177,6 +177,15 @@ private:
static std::optional<uint32_t> string_decode(std::string_view short_code);
// ------------------------------------------------------------------------------------- //
enum class Stage { UNINIT, TINY, FAST };
static std::mutex busy_;
static inline auto stage_ {Stage::UNINIT};
static const cases::Ranges *ranges_;
static const cases::RangesUnion *cases_;
};
} // namespace klotski::codec

Loading…
Cancel
Save