From 3907484effaff49d52585a5cf0fbca10c09d3b3b Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 18 May 2024 22:08:05 +0800 Subject: [PATCH] perf: optimize string encoding of CommonCode --- CMakeLists.txt | 2 +- src/core/CMakeLists.txt | 2 +- src/core/benchmark/codec.cc | 235 ++++++++++++++++----- src/core/common_code/common_code.h | 2 +- src/core/common_code/internal/serialize.cc | 28 +-- 5 files changed, 194 insertions(+), 75 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8de77eb..c542cfd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() add_compile_options(-Wall -Wextra) -add_compile_options(-flto=auto) # TODO: enabled by LTO option +add_compile_options(-flto=full) # TODO: enabled by LTO option get_filename_component(KLSK_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} ABSOLUTE) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f9ea8e3..ec121c1 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -26,7 +26,7 @@ set(KLOTSKI_CORE_SRC ) add_library(klotski_core STATIC ${KLOTSKI_CORE_SRC}) -target_compile_options(klotski_core PRIVATE -fno-rtti -fno-exceptions) +target_compile_options(klotski_core PRIVATE -fno-rtti -fno-exceptions) # option for `-fvisibility=hidden` target_include_directories(klotski_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) add_library(klotski::core ALIAS klotski_core) diff --git a/src/core/benchmark/codec.cc b/src/core/benchmark/codec.cc index c2be8b4..b984925 100644 --- a/src/core/benchmark/codec.cc +++ b/src/core/benchmark/codec.cc @@ -1,85 +1,210 @@ #include -#include "benchmark/benchmark.h" - -#include "all_cases/all_cases.h" +#include #define private public -#include - +#include "group/group.h" +#include "all_cases/all_cases.h" #include "common_code/common_code.h" +#undef private using klotski::cases::AllCases; +using klotski::codec::CommonCode; +/// Build all valid CommonCodes. static std::vector all_common_codes() { - std::vector all_codes; - + std::vector codes; for (uint64_t head = 0; head < 16; ++head) { for (const auto range : AllCases::instance().fetch()[head]) { - all_codes.emplace_back(head << 32 | range); + codes.emplace_back(head << 32 | range); } } + std::cout << "do cal complete" << std::endl; + return codes; +} + +std::vector common_code_samples(uint64_t num) { + + static auto codes = all_common_codes(); + + uint64_t part_size = codes.size() / num; + + // uint64_t offset = 0; + uint64_t offset = part_size / 2; + + std::vector result; + + for (uint64_t i = 0; i < num; ++i) { + uint64_t index = i * part_size + offset; + // // std::cout << "index = " << index << std::endl; + + // uint64_t kk[] {343, 666, 114514, 35324, 123454, 76453, 93411}; + // uint64_t index = kk[i % 7]; + + result.emplace_back(codes[index]); + } + + return result; - return all_codes; } -static std::vector select_codes() { - auto codes = all_common_codes(); +// std::vector select_codes(uint64_t num) { +// auto codes = all_common_codes(); +// +// // return {codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411]}; +// // std::array samples = { +// std::vector samples = { +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], +// }; +// +// return {samples.begin(), samples.begin() + num}; +// } + +std::vector str_common_codes(uint64_t num, bool shorten) { + // auto src = select_codes(num); + auto src = common_code_samples(num); + + std::vector codes; + + codes.reserve(src.size()); + for (auto x : src) { + codes.emplace_back(klotski::codec::CommonCode::unsafe_create(x).to_string(shorten)); + } + + return codes; +} + +static void CommonCodeSerialize(benchmark::State &state) { + + // common_code_samples(8); + + auto samples = common_code_samples(state.range(0)); - return {codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411]}; + for (auto _ : state) { + + for (auto code : samples) { + + volatile auto ret = klotski::codec::CommonCode::string_encode(code); + } + + } + + state.SetItemsProcessed(state.iterations() * state.range(0)); } -std::vector samples { - 0x00000303F, - 0x0000071F0, - 0x003339C40, - 0x000804340, - 0x0034C6D00, - 0x00230E0F0, - 0x002F371C0, -}; - -static void Demo(benchmark::State &state) { - - // auto samples = select_codes(); - // - // for (auto code : samples) { - // auto c = klotski::codec::CommonCode::create(code).value(); - // std::cout << c << std::endl; - // } - // std::cout << std::endl; - - // samples.clear(); - // samples = select_codes(); - // samples.emplace_back(0x00000303F); - // samples.emplace_back(0x0000071F0); - // samples.emplace_back(0x003339C40); - // samples.emplace_back(0x000804340); - // samples.emplace_back(0x0034C6D00); - // samples.emplace_back(0x00230E0F0); - // samples.emplace_back(0x002F371C0); +static void CommonCodeDeserialize(benchmark::State &state) { + const auto tmp = str_common_codes(state.range(0), false); + const std::vector samples {tmp.begin(), tmp.end()}; for (auto _ : state) { + for (const auto code : samples) { + benchmark::DoNotOptimize(CommonCode::string_decode(code)); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); + +} + +static void CommonCodeSerializeShorten(benchmark::State &state) { + + // auto samples = select_codes(state.range(0)); + auto samples = common_code_samples(state.range(0)); + for (auto _ : state) { for (auto code : samples) { - // auto volatile holder = klotski::codec::CommonCode::string_encode(code); - // auto volatile holder_1 = code + 213; - // auto volatile holder_2 = code + 123; - // auto volatile holder_3 = code + 233; - // auto volatile holder_4 = code + 412; - // auto volatile holder_5 = code + 896; - // auto volatile holder_6 = code + 154; - // auto volatile holder_7 = code + 124; - - auto ret = klotski::codec::CommonCode::string_encode(code); - // auto volatile str_1 = ret[0]; - // auto volatile str_2 = ret[1]; - // auto volatile str_3 = ret[2]; + volatile auto ret = klotski::codec::CommonCode::string_encode_shorten(code); } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} + +static void CommonCodeDeserializeShorten(benchmark::State &state) { + const auto tmp = str_common_codes(state.range(0), true); + const std::vector samples {tmp.begin(), tmp.end()}; + + for (auto _ : state) { + for (const auto code : samples) { + benchmark::DoNotOptimize(CommonCode::string_decode(code)); + } } + state.SetItemsProcessed(state.iterations() * state.range(0)); + } -BENCHMARK(Demo); +BENCHMARK(CommonCodeSerialize)->Range(8, 256); +BENCHMARK(CommonCodeDeserialize)->Range(8, 256); +BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256); +BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256); + +// static void CommonCodeDecode(benchmark::State &state) { +// const auto tmp = str_common_codes(state.range(0)); +// const std::vector samples {tmp.begin(), tmp.end()}; +// +// for (auto _ : state) { +// for (const auto code : samples) { +// benchmark::DoNotOptimize(CommonCode::string_decode(code)); +// } +// } +// state.SetBytesProcessed(int64_t(state.iterations()) * +// int64_t(state.range(0))); +// +// state.SetComplexityN(state.range(0)); +// +// } + +// BENCHMARK(CommonCodeStrEncode)->Range(8, 256); +// BENCHMARK(CommonCodeSStrEncode)->Range(8, 256) +// ->ComputeStatistics("ratio", [](const std::vector &v) -> double { +// return (*std::begin(v)) / (*std::end(v)); +// }, benchmark::StatisticUnit::kPercentage); + +// BENCHMARK(CommonCodeSStrEncode)->Range(8, 256)->Unit(benchmark::kMillisecond); + +// BENCHMARK(CommonCodeDeserialize)->RangeMultiplier(8)->Range(1, 256); +// BENCHMARK(CommonCodeDeserialize)->Name("Demo")->RangeMultiplier(8)->Range(1, 256); +// BENCHMARK(CommonCodeDeserialize)->RangeMultiplier(2)->Range(1, 256)->Complexity(benchmark::oN); BENCHMARK_MAIN(); diff --git a/src/core/common_code/common_code.h b/src/core/common_code/common_code.h index 46afcca..3815fcb 100644 --- a/src/core/common_code/common_code.h +++ b/src/core/common_code/common_code.h @@ -161,7 +161,7 @@ private: static std::string string_encode_shorten(uint64_t common_code); /// Deserialize CommonCode from string and return nullopt on error. - static std::optional string_decode(const std::string &common_code); + static std::optional string_decode(std::string_view common_code); // ------------------------------------------------------------------------------------- // }; diff --git a/src/core/common_code/internal/serialize.cc b/src/core/common_code/internal/serialize.cc index e6cd562..ab53641 100644 --- a/src/core/common_code/internal/serialize.cc +++ b/src/core/common_code/internal/serialize.cc @@ -4,10 +4,11 @@ namespace klotski::codec { /// Convert a single hexadecimal digit to a character. static char to_hex_char(const uint64_t hex_bit) { + [[assume(hex_bit < 0x10)]]; if (hex_bit < 0xA) { - return char(hex_bit + '0'); + return static_cast(hex_bit + '0'); } - return char(hex_bit + 'A' - 10); + return static_cast(hex_bit + 'A' - 10); } std::string CommonCode::string_encode(uint64_t common_code) { @@ -19,30 +20,23 @@ std::string CommonCode::string_encode(uint64_t common_code) { return std::string{code_str, code_str + 9}; } -std::string CommonCode::string_encode_shorten(uint64_t common_code) { +std::string CommonCode::string_encode_shorten(const uint64_t common_code) { if (common_code == 0) { return "0"; // special case } - int zero_start; - char code_str[9]; - for (int i = 0; i < 9; ++i) { - auto hex_bit = common_code >> (32 - i * 4) & 0b1111; - code_str[i] = to_hex_char(hex_bit); - if (hex_bit != 0) { - zero_start = 9; - } else if (zero_start == 9) { - zero_start = i; - } - } - return std::string{code_str, code_str + zero_start}; + auto code = string_encode(common_code); + code.resize(9 - (std::countr_zero(common_code) >> 2)); + return code; } -std::optional CommonCode::string_decode(const std::string &common_code) { +// TODO: direct table lookup can bring about a 30% improvement, +// TODO: but it is necessary to confirm the performance of different CPU caches. +std::optional CommonCode::string_decode(const std::string_view common_code) { if (common_code.length() > 9 || common_code.empty()) { return std::nullopt; // invalid string length } uint64_t result = 0; - for (auto hex_bit : common_code) { + for (const auto hex_bit : common_code) { if (hex_bit >= '0' && hex_bit <= '9') { // 0 ~ 9 (result <<= 4) |= (hex_bit - '0'); } else if (hex_bit >= 'A' && hex_bit <= 'F') { // A ~ F