Browse Source

perf: get `type_id` from CommonCode more faster

Dnomd343 9 months ago
  1. 4
  2. 66
  3. 3
  4. 136
  5. 32
  6. 2
  7. 32
  8. 51
  9. 149


@ -37,3 +37,7 @@ target_link_libraries(klotski_core_bin PRIVATE klotski_core)
add_executable(codec_benchmark benchmark/
target_compile_options(codec_benchmark PRIVATE -fno-rtti -fno-exceptions)
target_link_libraries(codec_benchmark PRIVATE klotski::core benchmark::benchmark_main)
add_executable(group_benchmark benchmark/
target_compile_options(group_benchmark PRIVATE -fno-rtti -fno-exceptions)
target_link_libraries(group_benchmark PRIVATE klotski::core benchmark::benchmark_main)


@ -0,0 +1,66 @@
#include <iostream>
#include <benchmark/benchmark.h>
#include <group/group.h>
#include "all_cases/all_cases.h"
using klotski::cases::AllCases;
/// Build all valid CommonCodes.
static std::vector<uint64_t> all_common_codes() {
std::vector<uint64_t> codes;
for (uint64_t head = 0; head < 16; ++head) {
for (const auto range : AllCases::instance().fetch()[head]) {
codes.emplace_back(head << 32 | range);
std::cout << "do cal complete" << std::endl;
return codes;
std::vector<uint64_t> common_code_samples(uint64_t num) {
static auto codes = all_common_codes();
uint64_t part_size = codes.size() / num;
// uint64_t offset = 0;
uint64_t offset = part_size / 2;
std::vector<uint64_t> result;
for (uint64_t i = 0; i < num; ++i) {
uint64_t index = i * part_size + offset;
// // std::cout << "index = " << index << std::endl;
// uint64_t kk[] {343, 666, 114514, 35324, 123454, 76453, 93411};
// uint64_t index = kk[i % 7];
return result;
static void CommonCodeToTypeId(benchmark::State &state) {
auto samples = common_code_samples(state.range(0));
for (auto _ : state) {
for (auto code : samples) {
volatile auto ret = klotski::cases::common_code_to_type_id(code);
state.SetItemsProcessed(state.iterations() * state.range(0));


@ -75,6 +75,9 @@ namespace klotski::cases {
constexpr uint32_t TYPE_ID_LIMIT = 203;
constexpr uint32_t ALL_GROUP_NUM = 25422;
uint32_t common_code_to_type_id(uint64_t common_code);
uint32_t raw_code_to_type_id(uint64_t raw_code);
class Group;
// TODO: add constexpr


@ -1,5 +1,7 @@
#include "group/group.h"
#include <utils/common.h>
/// 1. n_1x1 + (n_1x2 + n_2x1) * 2 <= 14
/// 2. (n_1x1 != 0) && (n_2x1 != 7)
struct block_num_t {
@ -15,3 +17,137 @@ struct block_num_t {
// TODO: convert from block_num -> type_id
// static int type_id(block_num_t block_num);
using klotski::range_reverse;
const uint16_t TYPE_ID_INDEX[203] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 256,
257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 272, 273, 274, 275,
276, 277, 278, 279, 280, 281, 282, 283, 284, 512, 513, 514, 515, 516, 517, 518,
519, 520, 521, 522, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 544,
545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 768, 769, 770, 771, 772, 773,
774, 775, 776, 784, 785, 786, 787, 788, 789, 790, 791, 792, 800, 801, 802, 803,
804, 805, 806, 807, 808, 816, 817, 818, 819, 820, 821, 822, 823, 824, 1024, 1025,
1026, 1027, 1028, 1029, 1030, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1056, 1057, 1058, 1059,
1060, 1061, 1062, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1088, 1089, 1090, 1091, 1092, 1093,
1094, 1280, 1281, 1282, 1283, 1284, 1296, 1297, 1298, 1299, 1300, 1312, 1313, 1314, 1315, 1316,
1328, 1329, 1330, 1331, 1332, 1344, 1345, 1346, 1347, 1348, 1360, 1361, 1362, 1363, 1364, 1536,
1537, 1538, 1552, 1553, 1554, 1568, 1569, 1570, 1584, 1585, 1586, 1600, 1601, 1602, 1616, 1617,
1618, 1632, 1633, 1634, 1792, 1808, 1824, 1840, 1856, 1872, 1888,
using klotski::cases::TYPE_ID_LIMIT;
uint32_t cal_type_id(block_num_t &&block_num) noexcept { // block_num_t -> type_id
/// flag -> ... 0000 0xxx 0xxx xxxx
/// n_x2x n_2x1 n_1x1
auto n_x2x = block_num.n_1x2 + block_num.n_2x1;
auto flag = (n_x2x << 8) | (block_num.n_2x1 << 4) | block_num.n_1x1;
return std::lower_bound(TYPE_ID_INDEX, TYPE_ID_INDEX + TYPE_ID_LIMIT, flag) - TYPE_ID_INDEX;
block_num_t cal_common_block_num(const uint64_t common_code) noexcept {
block_num_t result;
auto range = range_reverse(static_cast<uint32_t>(common_code));
for (; range; range >>= 2) {
switch (range & 0b11) {
case 0b01: /// 1x2 block
case 0b10: /// 2x1 block
case 0b11: /// 1x1 block
return result;
block_num_t cal_raw_block_num(const uint64_t raw_code) noexcept {
block_num_t result;
auto tmp = raw_code;
for (int addr = 0; addr < 20; ++addr, tmp >>= 3) {
switch (tmp & 0b111) {
case BLOCK_1x1:
case BLOCK_1x2:
case BLOCK_2x1:
return result;
uint32_t my_type_id(uint32_t n_1x2, uint32_t n_2x1, uint32_t n_1x1) noexcept { // block_num_t -> type_id
/// flag -> ... 0000 0xxx 0xxx xxxx
/// n_x2x n_2x1 n_1x1
// auto n_x2x = n_1x2 + n_2x1;
// auto flag = (n_x2x << 8) | (n_2x1 << 4) | n_1x1;
// return std::lower_bound(TYPE_ID_INDEX, TYPE_ID_INDEX + TYPE_ID_LIMIT, flag) - TYPE_ID_INDEX;
// for (int n = 0; n <= 7; ++n) { // n -> n_1x2 + n_2x1
// for (int n_21 = 0; n_21 <= n; ++n) {
// // n_11 <= 14 - n * 2
// for (int n_11 = 0; n_11 <= (14 - n*2); ++n_11) {
// // get one case
// }
// }
// }
// n = 0 | n_21 ~ 1 | n_11 ~ 15 | => 15
// n = 1 | n_21 ~ 2 | n_11 ~ 13 | => 26
// n = 2 | n_21 ~ 3 | n_11 ~ 11 | => 33
// n = 3 | n_21 ~ 4 | n_11 ~ 9 | => 36
// n = 4 | n_21 ~ 5 | n_11 ~ 7 | => 35
// n = 5 | n_21 ~ 6 | n_11 ~ 5 | => 30
// n = 6 | n_21 ~ 7 | n_11 ~ 3 | => 21
// n = 7 | n_21 ~ 8 | n_11 ~ 1 | => 8
constexpr uint32_t offset_tab[8] = {0, 15, 41, 74, 110, 145, 175, 196};
uint32_t n = n_1x2 + n_2x1;
uint32_t offset = offset_tab[n];
auto span = 15 - n*2;
uint32_t offset_ = span * n_2x1;
return offset + offset_ + n_1x1;
uint32_t common_code_pro(uint64_t common_code) {
uint32_t range = (uint32_t)common_code;
uint32_t k_01 = (~range >> 1) & range & 0x55555555;
uint32_t k_10 = (range >> 1) & ~range & 0x55555555;
uint32_t k_11 = (range >> 1) & range & 0x55555555;
auto n_01 = std::popcount(k_01);
auto n_10 = std::popcount(k_10);
auto n_11 = std::popcount(k_11);
// block_num_t tmp {
// .n_1x1 = (uint8_t)std::popcount(k_11),
// .n_1x2 = (uint8_t)std::popcount(k_01),
// .n_2x1 = (uint8_t)std::popcount(k_10),
// };
return my_type_id(n_01, n_10, n_11);
// return my_type_id(std::popcount(k_01), std::popcount(k_10), std::popcount(k_11));
uint32_t klotski::cases::common_code_to_type_id(uint64_t common_code) {
// return cal_type_id(cal_common_block_num(common_code));
return common_code_pro(common_code);
uint32_t klotski::cases::raw_code_to_type_id(uint64_t raw_code) {
return cal_type_id(cal_raw_block_num(raw_code));


@ -25,7 +25,37 @@ using klotski::codec::SHORT_CODE_LIMIT;
int main() {
const auto start = clock();
std::cout << std::format("{:09X}", 0x1A9BF0C00) << std::endl;
// std::cout << klotski::cases::common_code_to_type_id(0x1A9BF0C00) << std::endl;
// uint32_t demo = 0b101001110110;
// 010110001001
// 01001000100
// 00 -> 1 | 0 -> 0
// 01 -> 1 | 1 -> 1
// 10 -> 0 | 0 -> 0
// 11 -> 0 | 1 -> 0
// uint32_t ret = ((~demo >> 1) & demo) & 0x55555555;
// std::cout << ret << std::endl;
// std::cout << std::popcount(ret) << std::endl;
uint32_t range = 0xA9BF0C00; // n_01 = 1 / n_10 = 4 / n_11 = 4
// 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00
// 0 0 0 1 0 0 0 0 1 1 0 1 1 1 1 1
// 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0
// 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
uint32_t k_01 = (~range >> 1) & range & 0x55555555;
uint32_t k_10 = (range >> 1) & ~range & 0x55555555;
uint32_t k_11 = (range >> 1) & range & 0x55555555;
// std::cout << k_01 << std::endl;
std::cout << std::popcount(k_01) << std::endl;
std::cout << std::popcount(k_10) << std::endl;
std::cout << std::popcount(k_11) << std::endl;
// auto kk = GroupUnion::create(123).value();
// std::cout << kk.size() << std::endl;


@ -16,6 +16,8 @@
return ins; \
#define KLSK_INLINE __attribute__((always_inline))
namespace klotski {
/// Get the number of consecutive `0` in the low bits.


@ -19,36 +19,56 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../core/common_code)
# ------------------------------------------------------------------------------------ #
add_executable(test_klotski_cases ${KLOTSKI_TEST_CASES_SRC})
add_executable(test_klotski_cases ${KLSK_TEST_CASES_SRC})
target_link_libraries(test_klotski_cases PRIVATE ${KLSK_TEST_DEPS})
add_test(NAME klotski_cases COMMAND test_klotski_cases)
# ------------------------------------------------------------------------------------ #
add_executable(test_klotski_ffi ${KLOTSKI_TEST_FFI_SRC})
add_executable(test_klotski_ffi ${KLSK_TEST_FFI_SRC})
target_link_libraries(test_klotski_ffi PRIVATE ${KLSK_TEST_DEPS})
add_test(NAME klotski_ffi COMMAND test_klotski_ffi)
# ------------------------------------------------------------------------------------ #
add_executable(test_klotski_codec ${KLOTSKI_TEST_CODEC_SRC})
add_executable(test_klotski_codec ${KLSK_TEST_CODEC_SRC})
target_link_libraries(test_klotski_codec PRIVATE ${KLSK_TEST_DEPS})
add_test(NAME klotski_codec COMMAND test_klotski_codec)
# ------------------------------------------------------------------------------------ #
add_executable(test_klotski_core ${KLSK_TEST_CORE_SRC})
target_link_libraries(test_klotski_core PRIVATE ${KLSK_TEST_DEPS})
add_test(NAME klotski_core COMMAND test_klotski_core)
# ------------------------------------------------------------------------------------ #
add_executable(test_klotski_group_tmp ${KLSK_TEST_GROUP_TMP_SRC})
target_link_libraries(test_klotski_group_tmp PRIVATE ${KLSK_TEST_DEPS})
add_test(NAME klotski_group_tmp COMMAND test_klotski_group_tmp)
# ------------------------------------------------------------------------------------ #


@ -0,0 +1,51 @@
#include <gtest/gtest.h>
#include "core/core.h"
#include <unordered_set>
#include "all_cases/all_cases.h"
#include "common_code/common_code.h"
using klotski::core::Core;
using klotski::cases::AllCases;
using klotski::codec::CommonCode;
// mask test
TEST(core, core) {
std::vector<uint64_t> raw_codes;
// std::unordered_set<uint64_t> codes;
// codes.reserve(klotski::cases::ALL_CASES_NUM_);
for (uint64_t head = 0; head < 16; ++head) {
for (const auto range : AllCases::instance().fetch()[head]) {
auto common_code = CommonCode::unsafe_create(head << 32 | range);
auto raw_code = common_code.to_raw_code().unwrap();
// codes.emplace(raw_code);
// auto core = Core([&codes](uint64_t ret, uint64_t) {
// EXPECT_EQ(codes.count(ret), 1);
// });
std::vector<uint64_t> codes;
auto core = Core([&codes](uint64_t ret, uint64_t) {
for (auto raw_code : raw_codes) {
core.next_cases(raw_code, 0);
// std::cout << codes.size() << std::endl;


@ -0,0 +1,149 @@
#include <common.h>
#include <gtest/gtest.h>
#include <thread>
#include <md5.h>
#include "group/group.h"
#include "all_cases/all_cases.h"
using klotski::range_reverse;
using klotski::cases::AllCases;
using klotski::codec::CommonCode;
using klotski::cases::ALL_CASES_NUM;
struct block_num_t {
uint8_t n_1x1 = 0; /// [0, 14]
uint8_t n_1x2 = 0; /// [0, 7]
uint8_t n_2x1 = 0; /// [0, 7]
bool operator==(block_num_t b1, block_num_t b2) {
return (b1.n_1x1 == b2.n_1x1) && (b1.n_1x2 == b2.n_1x2) && (b1.n_2x1 == b2.n_2x1);
const char BLOCK_NUM_MD5[] = "46a7b3af6d039cbe2f7eaebdd196c6a2";
block_num_t common_block_num(const uint64_t common_code) noexcept {
block_num_t result;
auto range = range_reverse(static_cast<uint32_t>(common_code));
for (; range; range >>= 2) {
switch (range & 0b11) {
case 0b01: /// 1x2 block
case 0b10: /// 2x1 block
case 0b11: /// 1x1 block
return result;
block_num_t raw_block_num(const uint64_t raw_code) noexcept {
block_num_t result;
auto tmp = raw_code;
for (int addr = 0; addr < 20; ++addr, tmp >>= 3) {
switch (tmp & 0b111) {
case BLOCK_1x1:
case BLOCK_1x2:
case BLOCK_2x1:
return result;
const uint16_t TYPE_ID_INDEX[203] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 256,
257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 272, 273, 274, 275,
276, 277, 278, 279, 280, 281, 282, 283, 284, 512, 513, 514, 515, 516, 517, 518,
519, 520, 521, 522, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 544,
545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 768, 769, 770, 771, 772, 773,
774, 775, 776, 784, 785, 786, 787, 788, 789, 790, 791, 792, 800, 801, 802, 803,
804, 805, 806, 807, 808, 816, 817, 818, 819, 820, 821, 822, 823, 824, 1024, 1025,
1026, 1027, 1028, 1029, 1030, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1056, 1057, 1058, 1059,
1060, 1061, 1062, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1088, 1089, 1090, 1091, 1092, 1093,
1094, 1280, 1281, 1282, 1283, 1284, 1296, 1297, 1298, 1299, 1300, 1312, 1313, 1314, 1315, 1316,
1328, 1329, 1330, 1331, 1332, 1344, 1345, 1346, 1347, 1348, 1360, 1361, 1362, 1363, 1364, 1536,
1537, 1538, 1552, 1553, 1554, 1568, 1569, 1570, 1584, 1585, 1586, 1600, 1601, 1602, 1616, 1617,
1618, 1632, 1633, 1634, 1792, 1808, 1824, 1840, 1856, 1872, 1888,
uint32_t get_type_id(block_num_t block_num) noexcept { // block_num_t -> type_id
/// flag -> ... 0000 0xxx 0xxx xxxx
/// n_x2x n_2x1 n_1x1
auto n_x2x = block_num.n_1x2 + block_num.n_2x1;
auto flag = (n_x2x << 8) | (block_num.n_2x1 << 4) | block_num.n_1x1;
return std::lower_bound(TYPE_ID_INDEX, TYPE_ID_INDEX + klotski::cases::TYPE_ID_LIMIT, flag) - TYPE_ID_INDEX;
TEST(Group, block_num) {
std::string result[16];
auto test = [&result](uint64_t head) {
char buffer[13];
result[head].reserve(ALL_CASES_NUM[head]); // vector pre-allocated
for (const auto range: AllCases::instance().fetch()[head]) {
auto common_code = CommonCode::unsafe_create(head << 32 | range);
auto tmp = common_block_num(common_code.unwrap());
EXPECT_EQ(tmp, raw_block_num(common_code.to_raw_code().unwrap()));
EXPECT_LE(tmp.n_1x2 * 2 + tmp.n_2x1 * 2 + tmp.n_1x1, 14);
sprintf(buffer, "%d,%d,%d\n", tmp.n_1x2 + tmp.n_2x1, tmp.n_1x1, tmp.n_2x1);
result[head] += buffer;
std::thread threads[16];
for (uint64_t head = 0; head < 16; ++head) {
threads[head] = std::thread(test, head); // multi-threads verify
for (auto &t : threads) { t.join(); } // build string data
std::string block_num_str;
for (auto &&tmp : result) {
block_num_str += tmp; // combine result
auto block_num_md5 = md5::MD5::Hash(block_num_str.c_str(), block_num_str.size());
EXPECT_STREQ(block_num_md5.c_str(), BLOCK_NUM_MD5); // verify md5
TEST(Group, common_code) {
for (uint64_t head = 0; head < 16; ++head) {
for (const auto range : AllCases::instance().fetch()[head]) {
auto common_code = (head << 32 | range);
EXPECT_EQ(klotski::cases::common_code_to_type_id(common_code), get_type_id(common_block_num(common_code)));
TEST(Group, raw_code) {
for (uint64_t head = 0; head < 16; ++head) {
for (const auto range : AllCases::instance().fetch()[head]) {
auto raw_code = CommonCode::unsafe_create(head << 32 | range).to_raw_code().unwrap();
EXPECT_EQ(klotski::cases::raw_code_to_type_id(raw_code), get_type_id(raw_block_num(raw_code)));