From 5b979e8d93f24016230563b23f0907ffa582b563 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 19 May 2024 15:03:46 +0800 Subject: [PATCH] perf: get `type_id` from CommonCode more faster --- src/core/CMakeLists.txt | 4 + src/core/benchmark/group.cc | 66 +++++++++++ src/core/group/group.h | 3 + src/core/group/internal/group.cc | 136 ++++++++++++++++++++++ src/core/main.cc | 32 +++++- src/core/utils/utility.h | 2 + src/core_test/CMakeLists.txt | 32 +++++- src/core_test/core/core.cc | 51 +++++++++ src/core_test/group_tmp/group_union.cc | 149 +++++++++++++++++++++++++ 9 files changed, 468 insertions(+), 7 deletions(-) create mode 100644 src/core/benchmark/group.cc create mode 100644 src/core_test/core/core.cc create mode 100644 src/core_test/group_tmp/group_union.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index ec121c1..bee586b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -37,3 +37,7 @@ target_link_libraries(klotski_core_bin PRIVATE klotski_core) add_executable(codec_benchmark benchmark/codec.cc) target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions) target_link_libraries(codec_benchmark PRIVATE klotski::core benchmark::benchmark_main) + +add_executable(group_benchmark benchmark/group.cc) +target_compile_options(group_benchmark PRIVATE -fno-rtti -fno-exceptions) +target_link_libraries(group_benchmark PRIVATE klotski::core benchmark::benchmark_main) diff --git a/src/core/benchmark/group.cc b/src/core/benchmark/group.cc new file mode 100644 index 0000000..c5fe154 --- /dev/null +++ b/src/core/benchmark/group.cc @@ -0,0 +1,66 @@ +#include + +#include +#include + +#include "all_cases/all_cases.h" + +using klotski::cases::AllCases; + +/// Build all valid CommonCodes. +static std::vector all_common_codes() { + std::vector codes; + for (uint64_t head = 0; head < 16; ++head) { + for (const auto range : AllCases::instance().fetch()[head]) { + codes.emplace_back(head << 32 | range); + } + } + std::cout << "do cal complete" << std::endl; + return codes; +} + +std::vector common_code_samples(uint64_t num) { + + static auto codes = all_common_codes(); + + uint64_t part_size = codes.size() / num; + + // uint64_t offset = 0; + uint64_t offset = part_size / 2; + + std::vector result; + + for (uint64_t i = 0; i < num; ++i) { + uint64_t index = i * part_size + offset; + // // std::cout << "index = " << index << std::endl; + + // uint64_t kk[] {343, 666, 114514, 35324, 123454, 76453, 93411}; + // uint64_t index = kk[i % 7]; + + result.emplace_back(codes[index]); + } + + return result; + +} + +static void CommonCodeToTypeId(benchmark::State &state) { + + auto samples = common_code_samples(state.range(0)); + + for (auto _ : state) { + + for (auto code : samples) { + + volatile auto ret = klotski::cases::common_code_to_type_id(code); + } + + } + + state.SetItemsProcessed(state.iterations() * state.range(0)); + +} + +BENCHMARK(CommonCodeToTypeId)->Arg(8)->Arg(64)->Arg(256); + +BENCHMARK_MAIN(); diff --git a/src/core/group/group.h b/src/core/group/group.h index e4cedc4..f900b89 100644 --- a/src/core/group/group.h +++ b/src/core/group/group.h @@ -75,6 +75,9 @@ namespace klotski::cases { constexpr uint32_t TYPE_ID_LIMIT = 203; constexpr uint32_t ALL_GROUP_NUM = 25422; +uint32_t common_code_to_type_id(uint64_t common_code); +uint32_t raw_code_to_type_id(uint64_t raw_code); + class Group; // TODO: add constexpr diff --git a/src/core/group/internal/group.cc b/src/core/group/internal/group.cc index b87200a..44b6deb 100644 --- a/src/core/group/internal/group.cc +++ b/src/core/group/internal/group.cc @@ -1,5 +1,7 @@ #include "group/group.h" +#include + /// 1. n_1x1 + (n_1x2 + n_2x1) * 2 <= 14 /// 2. (n_1x1 != 0) && (n_2x1 != 7) struct block_num_t { @@ -15,3 +17,137 @@ struct block_num_t { // TODO: convert from block_num -> type_id // static int type_id(block_num_t block_num); + +using klotski::range_reverse; + +const uint16_t TYPE_ID_INDEX[203] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 256, + 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 272, 273, 274, 275, + 276, 277, 278, 279, 280, 281, 282, 283, 284, 512, 513, 514, 515, 516, 517, 518, + 519, 520, 521, 522, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 544, + 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 768, 769, 770, 771, 772, 773, + 774, 775, 776, 784, 785, 786, 787, 788, 789, 790, 791, 792, 800, 801, 802, 803, + 804, 805, 806, 807, 808, 816, 817, 818, 819, 820, 821, 822, 823, 824, 1024, 1025, + 1026, 1027, 1028, 1029, 1030, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1056, 1057, 1058, 1059, + 1060, 1061, 1062, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1088, 1089, 1090, 1091, 1092, 1093, + 1094, 1280, 1281, 1282, 1283, 1284, 1296, 1297, 1298, 1299, 1300, 1312, 1313, 1314, 1315, 1316, + 1328, 1329, 1330, 1331, 1332, 1344, 1345, 1346, 1347, 1348, 1360, 1361, 1362, 1363, 1364, 1536, + 1537, 1538, 1552, 1553, 1554, 1568, 1569, 1570, 1584, 1585, 1586, 1600, 1601, 1602, 1616, 1617, + 1618, 1632, 1633, 1634, 1792, 1808, 1824, 1840, 1856, 1872, 1888, +}; + +using klotski::cases::TYPE_ID_LIMIT; + +uint32_t cal_type_id(block_num_t &&block_num) noexcept { // block_num_t -> type_id + /// flag -> ... 0000 0xxx 0xxx xxxx + /// n_x2x n_2x1 n_1x1 + auto n_x2x = block_num.n_1x2 + block_num.n_2x1; + auto flag = (n_x2x << 8) | (block_num.n_2x1 << 4) | block_num.n_1x1; + return std::lower_bound(TYPE_ID_INDEX, TYPE_ID_INDEX + TYPE_ID_LIMIT, flag) - TYPE_ID_INDEX; +} + +block_num_t cal_common_block_num(const uint64_t common_code) noexcept { + block_num_t result; + auto range = range_reverse(static_cast(common_code)); + for (; range; range >>= 2) { + switch (range & 0b11) { + case 0b01: /// 1x2 block + ++result.n_1x2; + continue; + case 0b10: /// 2x1 block + ++result.n_2x1; + continue; + case 0b11: /// 1x1 block + ++result.n_1x1; + continue; + } + } + return result; +} + +block_num_t cal_raw_block_num(const uint64_t raw_code) noexcept { + block_num_t result; + auto tmp = raw_code; + for (int addr = 0; addr < 20; ++addr, tmp >>= 3) { + switch (tmp & 0b111) { + case BLOCK_1x1: + ++result.n_1x1; + continue; + case BLOCK_1x2: + ++result.n_1x2; + continue; + case BLOCK_2x1: + ++result.n_2x1; + continue; + } + } + return result; +} + +uint32_t my_type_id(uint32_t n_1x2, uint32_t n_2x1, uint32_t n_1x1) noexcept { // block_num_t -> type_id + /// flag -> ... 0000 0xxx 0xxx xxxx + /// n_x2x n_2x1 n_1x1 + // auto n_x2x = n_1x2 + n_2x1; + // auto flag = (n_x2x << 8) | (n_2x1 << 4) | n_1x1; + // return std::lower_bound(TYPE_ID_INDEX, TYPE_ID_INDEX + TYPE_ID_LIMIT, flag) - TYPE_ID_INDEX; + + // for (int n = 0; n <= 7; ++n) { // n -> n_1x2 + n_2x1 + // for (int n_21 = 0; n_21 <= n; ++n) { + // // n_11 <= 14 - n * 2 + // for (int n_11 = 0; n_11 <= (14 - n*2); ++n_11) { + // // get one case + // } + // } + // } + + // n = 0 | n_21 ~ 1 | n_11 ~ 15 | => 15 + // n = 1 | n_21 ~ 2 | n_11 ~ 13 | => 26 + // n = 2 | n_21 ~ 3 | n_11 ~ 11 | => 33 + // n = 3 | n_21 ~ 4 | n_11 ~ 9 | => 36 + // n = 4 | n_21 ~ 5 | n_11 ~ 7 | => 35 + // n = 5 | n_21 ~ 6 | n_11 ~ 5 | => 30 + // n = 6 | n_21 ~ 7 | n_11 ~ 3 | => 21 + // n = 7 | n_21 ~ 8 | n_11 ~ 1 | => 8 + + constexpr uint32_t offset_tab[8] = {0, 15, 41, 74, 110, 145, 175, 196}; + + uint32_t n = n_1x2 + n_2x1; + uint32_t offset = offset_tab[n]; + + auto span = 15 - n*2; + + uint32_t offset_ = span * n_2x1; + + return offset + offset_ + n_1x1; + +} + +uint32_t common_code_pro(uint64_t common_code) { + uint32_t range = (uint32_t)common_code; + uint32_t k_01 = (~range >> 1) & range & 0x55555555; + uint32_t k_10 = (range >> 1) & ~range & 0x55555555; + uint32_t k_11 = (range >> 1) & range & 0x55555555; + + auto n_01 = std::popcount(k_01); + auto n_10 = std::popcount(k_10); + auto n_11 = std::popcount(k_11); + + // block_num_t tmp { + // .n_1x1 = (uint8_t)std::popcount(k_11), + // .n_1x2 = (uint8_t)std::popcount(k_01), + // .n_2x1 = (uint8_t)std::popcount(k_10), + // }; + + return my_type_id(n_01, n_10, n_11); + + // return my_type_id(std::popcount(k_01), std::popcount(k_10), std::popcount(k_11)); +} + +uint32_t klotski::cases::common_code_to_type_id(uint64_t common_code) { + // return cal_type_id(cal_common_block_num(common_code)); + return common_code_pro(common_code); +} + +uint32_t klotski::cases::raw_code_to_type_id(uint64_t raw_code) { + return cal_type_id(cal_raw_block_num(raw_code)); +} diff --git a/src/core/main.cc b/src/core/main.cc index 915013c..e94d2d7 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -25,7 +25,37 @@ using klotski::codec::SHORT_CODE_LIMIT; int main() { const auto start = clock(); - std::cout << std::format("{:09X}", 0x1A9BF0C00) << std::endl; + // std::cout << klotski::cases::common_code_to_type_id(0x1A9BF0C00) << std::endl; + + // uint32_t demo = 0b101001110110; + // 010110001001 + // 01001000100 + + // 00 -> 1 | 0 -> 0 + // 01 -> 1 | 1 -> 1 + // 10 -> 0 | 0 -> 0 + // 11 -> 0 | 1 -> 0 + + // uint32_t ret = ((~demo >> 1) & demo) & 0x55555555; + // + // std::cout << ret << std::endl; + // std::cout << std::popcount(ret) << std::endl; + + uint32_t range = 0xA9BF0C00; // n_01 = 1 / n_10 = 4 / n_11 = 4 + + // 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00 + // 0 0 0 1 0 0 0 0 1 1 0 1 1 1 1 1 + // 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 + // 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 + + uint32_t k_01 = (~range >> 1) & range & 0x55555555; + uint32_t k_10 = (range >> 1) & ~range & 0x55555555; + uint32_t k_11 = (range >> 1) & range & 0x55555555; + + // std::cout << k_01 << std::endl; + std::cout << std::popcount(k_01) << std::endl; + std::cout << std::popcount(k_10) << std::endl; + std::cout << std::popcount(k_11) << std::endl; // auto kk = GroupUnion::create(123).value(); // std::cout << kk.size() << std::endl; diff --git a/src/core/utils/utility.h b/src/core/utils/utility.h index d293298..edb2cb9 100644 --- a/src/core/utils/utility.h +++ b/src/core/utils/utility.h @@ -16,6 +16,8 @@ return ins; \ } +#define KLSK_INLINE __attribute__((always_inline)) + namespace klotski { /// Get the number of consecutive `0` in the low bits. diff --git a/src/core_test/CMakeLists.txt b/src/core_test/CMakeLists.txt index c8482e5..dbe6ea6 100644 --- a/src/core_test/CMakeLists.txt +++ b/src/core_test/CMakeLists.txt @@ -19,36 +19,56 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../core/common_code) # ------------------------------------------------------------------------------------ # -set(KLOTSKI_TEST_CASES_SRC +set(KLSK_TEST_CASES_SRC cases/all_cases.cc cases/basic_ranges.cc ) -add_executable(test_klotski_cases ${KLOTSKI_TEST_CASES_SRC}) +add_executable(test_klotski_cases ${KLSK_TEST_CASES_SRC}) target_link_libraries(test_klotski_cases PRIVATE ${KLSK_TEST_DEPS}) add_test(NAME klotski_cases COMMAND test_klotski_cases) # ------------------------------------------------------------------------------------ # -set(KLOTSKI_TEST_FFI_SRC +set(KLSK_TEST_FFI_SRC ffi/all_cases.cc ) -add_executable(test_klotski_ffi ${KLOTSKI_TEST_FFI_SRC}) +add_executable(test_klotski_ffi ${KLSK_TEST_FFI_SRC}) target_link_libraries(test_klotski_ffi PRIVATE ${KLSK_TEST_DEPS}) add_test(NAME klotski_ffi COMMAND test_klotski_ffi) # ------------------------------------------------------------------------------------ # -set(KLOTSKI_TEST_CODEC_SRC +set(KLSK_TEST_CODEC_SRC codec/mirror.cc codec/raw_code.cc codec/short_code.cc codec/common_code.cc ) -add_executable(test_klotski_codec ${KLOTSKI_TEST_CODEC_SRC}) +add_executable(test_klotski_codec ${KLSK_TEST_CODEC_SRC}) target_link_libraries(test_klotski_codec PRIVATE ${KLSK_TEST_DEPS}) add_test(NAME klotski_codec COMMAND test_klotski_codec) # ------------------------------------------------------------------------------------ # + +set(KLSK_TEST_CORE_SRC + core/core.cc +) + +add_executable(test_klotski_core ${KLSK_TEST_CORE_SRC}) +target_link_libraries(test_klotski_core PRIVATE ${KLSK_TEST_DEPS}) +add_test(NAME klotski_core COMMAND test_klotski_core) + +# ------------------------------------------------------------------------------------ # + +set(KLSK_TEST_GROUP_TMP_SRC + group_tmp/group_union.cc +) + +add_executable(test_klotski_group_tmp ${KLSK_TEST_GROUP_TMP_SRC}) +target_link_libraries(test_klotski_group_tmp PRIVATE ${KLSK_TEST_DEPS}) +add_test(NAME klotski_group_tmp COMMAND test_klotski_group_tmp) + +# ------------------------------------------------------------------------------------ # diff --git a/src/core_test/core/core.cc b/src/core_test/core/core.cc new file mode 100644 index 0000000..db0052a --- /dev/null +++ b/src/core_test/core/core.cc @@ -0,0 +1,51 @@ +#include + +#include "core/core.h" + +#include + +#include "all_cases/all_cases.h" +#include "common_code/common_code.h" + +using klotski::core::Core; +using klotski::cases::AllCases; +using klotski::codec::CommonCode; + +// mask test + +TEST(core, core) { + + std::vector raw_codes; + raw_codes.reserve(klotski::cases::ALL_CASES_NUM_); + + // std::unordered_set codes; + // codes.reserve(klotski::cases::ALL_CASES_NUM_); + + for (uint64_t head = 0; head < 16; ++head) { + for (const auto range : AllCases::instance().fetch()[head]) { + auto common_code = CommonCode::unsafe_create(head << 32 | range); + auto raw_code = common_code.to_raw_code().unwrap(); + + raw_codes.emplace_back(raw_code); + // codes.emplace(raw_code); + } + } + + // auto core = Core([&codes](uint64_t ret, uint64_t) { + // EXPECT_EQ(codes.count(ret), 1); + // }); + + std::vector codes; + codes.reserve(402258220); + + auto core = Core([&codes](uint64_t ret, uint64_t) { + codes.emplace_back(klotski::codec::RawCode::unsafe_create(ret).to_common_code().unwrap()); + }); + + for (auto raw_code : raw_codes) { + core.next_cases(raw_code, 0); + } + + // std::cout << codes.size() << std::endl; + +} diff --git a/src/core_test/group_tmp/group_union.cc b/src/core_test/group_tmp/group_union.cc new file mode 100644 index 0000000..099e15a --- /dev/null +++ b/src/core_test/group_tmp/group_union.cc @@ -0,0 +1,149 @@ +#include +#include + +#include + +#include + +#include "group/group.h" +#include "all_cases/all_cases.h" + +using klotski::range_reverse; + +using klotski::cases::AllCases; +using klotski::codec::CommonCode; + +using klotski::cases::ALL_CASES_NUM; + +struct block_num_t { + uint8_t n_1x1 = 0; /// [0, 14] + uint8_t n_1x2 = 0; /// [0, 7] + uint8_t n_2x1 = 0; /// [0, 7] +}; + +bool operator==(block_num_t b1, block_num_t b2) { + return (b1.n_1x1 == b2.n_1x1) && (b1.n_1x2 == b2.n_1x2) && (b1.n_2x1 == b2.n_2x1); +} + +const char BLOCK_NUM_MD5[] = "46a7b3af6d039cbe2f7eaebdd196c6a2"; + +block_num_t common_block_num(const uint64_t common_code) noexcept { + block_num_t result; + auto range = range_reverse(static_cast(common_code)); + for (; range; range >>= 2) { + switch (range & 0b11) { + case 0b01: /// 1x2 block + ++result.n_1x2; + continue; + case 0b10: /// 2x1 block + ++result.n_2x1; + continue; + case 0b11: /// 1x1 block + ++result.n_1x1; + continue; + } + } + return result; +} + +block_num_t raw_block_num(const uint64_t raw_code) noexcept { + block_num_t result; + auto tmp = raw_code; + for (int addr = 0; addr < 20; ++addr, tmp >>= 3) { + switch (tmp & 0b111) { + case BLOCK_1x1: + ++result.n_1x1; + continue; + case BLOCK_1x2: + ++result.n_1x2; + continue; + case BLOCK_2x1: + ++result.n_2x1; + continue; + } + } + return result; +} + +const uint16_t TYPE_ID_INDEX[203] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 256, + 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 272, 273, 274, 275, + 276, 277, 278, 279, 280, 281, 282, 283, 284, 512, 513, 514, 515, 516, 517, 518, + 519, 520, 521, 522, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 544, + 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 768, 769, 770, 771, 772, 773, + 774, 775, 776, 784, 785, 786, 787, 788, 789, 790, 791, 792, 800, 801, 802, 803, + 804, 805, 806, 807, 808, 816, 817, 818, 819, 820, 821, 822, 823, 824, 1024, 1025, + 1026, 1027, 1028, 1029, 1030, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1056, 1057, 1058, 1059, + 1060, 1061, 1062, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1088, 1089, 1090, 1091, 1092, 1093, + 1094, 1280, 1281, 1282, 1283, 1284, 1296, 1297, 1298, 1299, 1300, 1312, 1313, 1314, 1315, 1316, + 1328, 1329, 1330, 1331, 1332, 1344, 1345, 1346, 1347, 1348, 1360, 1361, 1362, 1363, 1364, 1536, + 1537, 1538, 1552, 1553, 1554, 1568, 1569, 1570, 1584, 1585, 1586, 1600, 1601, 1602, 1616, 1617, + 1618, 1632, 1633, 1634, 1792, 1808, 1824, 1840, 1856, 1872, 1888, +}; + +uint32_t get_type_id(block_num_t block_num) noexcept { // block_num_t -> type_id + /// flag -> ... 0000 0xxx 0xxx xxxx + /// n_x2x n_2x1 n_1x1 + auto n_x2x = block_num.n_1x2 + block_num.n_2x1; + auto flag = (n_x2x << 8) | (block_num.n_2x1 << 4) | block_num.n_1x1; + return std::lower_bound(TYPE_ID_INDEX, TYPE_ID_INDEX + klotski::cases::TYPE_ID_LIMIT, flag) - TYPE_ID_INDEX; +} + +TEST(Group, block_num) { + std::string result[16]; + auto test = [&result](uint64_t head) { + char buffer[13]; + result[head].reserve(ALL_CASES_NUM[head]); // vector pre-allocated + + for (const auto range: AllCases::instance().fetch()[head]) { + auto common_code = CommonCode::unsafe_create(head << 32 | range); + + auto tmp = common_block_num(common_code.unwrap()); + EXPECT_EQ(tmp, raw_block_num(common_code.to_raw_code().unwrap())); + + EXPECT_LE(tmp.n_1x2 * 2 + tmp.n_2x1 * 2 + tmp.n_1x1, 14); + sprintf(buffer, "%d,%d,%d\n", tmp.n_1x2 + tmp.n_2x1, tmp.n_1x1, tmp.n_2x1); + result[head] += buffer; + } + }; + + std::thread threads[16]; + for (uint64_t head = 0; head < 16; ++head) { + threads[head] = std::thread(test, head); // multi-threads verify + } + for (auto &t : threads) { t.join(); } // build string data + + std::string block_num_str; + for (auto &&tmp : result) { + block_num_str += tmp; // combine result + } + auto block_num_md5 = md5::MD5::Hash(block_num_str.c_str(), block_num_str.size()); + EXPECT_STREQ(block_num_md5.c_str(), BLOCK_NUM_MD5); // verify md5 +} + +TEST(Group, common_code) { + + for (uint64_t head = 0; head < 16; ++head) { + for (const auto range : AllCases::instance().fetch()[head]) { + + auto common_code = (head << 32 | range); + + EXPECT_EQ(klotski::cases::common_code_to_type_id(common_code), get_type_id(common_block_num(common_code))); + + } + } + +} + +TEST(Group, raw_code) { + for (uint64_t head = 0; head < 16; ++head) { + for (const auto range : AllCases::instance().fetch()[head]) { + + auto raw_code = CommonCode::unsafe_create(head << 32 | range).to_raw_code().unwrap(); + + EXPECT_EQ(klotski::cases::raw_code_to_type_id(raw_code), get_type_id(raw_block_num(raw_code))); + + } + } + +}