From 4814b3973bf1ce6b389760d0efe54e831009283b Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 5 Jan 2025 17:43:58 +0800 Subject: [PATCH] perf: space search algorithm --- src/core/mover/internal/s2_mover.cc | 283 ++++++++++++++-------------- src/core/mover/s2_mover.h | 12 +- 2 files changed, 144 insertions(+), 151 deletions(-) diff --git a/src/core/mover/internal/s2_mover.cc b/src/core/mover/internal/s2_mover.cc index 7f7e577..e6075fb 100644 --- a/src/core/mover/internal/s2_mover.cc +++ b/src/core/mover/internal/s2_mover.cc @@ -46,10 +46,43 @@ using klotski::codec::RawCode; SET_2x2_(UNSET_2x2_(code, addr_old), addr_new) template -void S2Mover::two_space_a(const uint64_t code) const { - // ---------------- case up ---------------- +void S2Mover::move_single(const uint64_t code) const { + if (N >= 4) { // case up + if (CAPTURE(code, N - 4) == BLOCK_1x1) { + release_(MOVE_1x1(code, N - 4, N)); + } else if (N >= 8 && CAPTURE(code, N - 8) == BLOCK_2x1) { + release_(MOVE_2x1(code, N - 8, N - 4)); + } + } + + if (N < 16) { // case down + if (const uint8_t block = CAPTURE(code, N + 4); block == BLOCK_1x1) { + release_(MOVE_1x1(code, N + 4, N)); + } else if (N < 12 && block == BLOCK_2x1) { + release_(MOVE_2x1(code, N + 4, N)); + } + } + + if (N % 4 >= 1) { // case left + if (CAPTURE(code, N - 1) == BLOCK_1x1) { + release_(MOVE_1x1(code, N - 1, N)); + } else if (N % 4 >= 2 && CAPTURE(code, N - 2) == BLOCK_1x2) { + release_(MOVE_1x2(code, N - 2, N - 1)); + } + } - do { + if (N % 4 < 3) { // case right + if (const uint8_t block = CAPTURE(code, N + 1); block == BLOCK_1x1) { + release_(MOVE_1x1(code, N + 1, N)); + } else if (N % 4 < 2 && block == BLOCK_1x2) { + release_(MOVE_1x2(code, N + 1, N)); + } + } +} + +template +void S2Mover::move_double_h(const uint64_t code) const { + do { // case up if (N >= 4) { if (const uint8_t block = CAPTURE(code, N - 4); block == BLOCK_1x1) { // left part release_(MOVE_1x1(code, N - 4, N)); @@ -58,12 +91,10 @@ void S2Mover::two_space_a(const uint64_t code) const { release_(MOVE_1x2(code, N - 4, N)); break; } else if (N >= 8 && block == BLOCK_fill) { - const uint8_t up_a = CAPTURE(code, N - 8); - if (up_a == BLOCK_2x2) { + if (const uint8_t block_ = CAPTURE(code, N - 8); block_ == BLOCK_2x2) { release_(MOVE_2x2(code, N - 8, N - 4)); break; - } - if (up_a == BLOCK_2x1) { + } else if (block_ == BLOCK_2x1) { release_(MOVE_2x1(code, N - 8, N - 4)); } } @@ -71,16 +102,14 @@ void S2Mover::two_space_a(const uint64_t code) const { release_(MOVE_1x1(code, N - 3, N + 1)); release_(MOVE_1x1(code, N - 3, N)); } else if (N >= 8 && block == BLOCK_fill && CAPTURE(code, N - 7) == BLOCK_2x1) { - release_(MOVE_2x1(code, N - 7, N - 3)); // TODO: benchmark of check `BLOCK_fill` + release_(MOVE_2x1(code, N - 7, N - 3)); } } } while (false); - // ---------------- case down ---------------- - - do { + do { // case down if (N < 16) { - if (const uint8_t block = CAPTURE(code, N + 4); block == BLOCK_1x1) { + if (const uint8_t block = CAPTURE(code, N + 4); block == BLOCK_1x1) { // left part release_(MOVE_1x1(code, N + 4, N)); release_(MOVE_1x1(code, N + 4, N + 1)); } else if (N < 12 && block == BLOCK_2x1) { @@ -92,7 +121,7 @@ void S2Mover::two_space_a(const uint64_t code) const { release_(MOVE_2x2(code, N + 4, N)); break; } - if (const uint8_t block = CAPTURE(code, N + 5); block == BLOCK_1x1) { + if (const uint8_t block = CAPTURE(code, N + 5); block == BLOCK_1x1) { // right part release_(MOVE_1x1(code, N + 5, N + 1)); release_(MOVE_1x1(code, N + 5, N)); } else if (N < 12 && block == BLOCK_2x1) { @@ -101,9 +130,7 @@ void S2Mover::two_space_a(const uint64_t code) const { } } while (false); - // ---------------- case left ---------------- - - if (N % 4 >= 1) { + if (N % 4 >= 1) { // case left if (CAPTURE(code, N - 1) == BLOCK_1x1) { release_(MOVE_1x1(code, N - 1, N)); release_(MOVE_1x1(code, N - 1, N + 1)); @@ -113,11 +140,9 @@ void S2Mover::two_space_a(const uint64_t code) const { } } - // ---------------- case right ---------------- - - if (N % 4 < 2) { + if (N % 4 < 2) { // case right if (const uint8_t block = CAPTURE(code, N + 2); block == BLOCK_1x1) { - release_(MOVE_1x1(code, N + 2, N + 1)); // TODO: benchmark of CSE pass + release_(MOVE_1x1(code, N + 2, N + 1)); release_(MOVE_1x1(code, N + 2, N)); } else if (N % 4 == 0 && block == BLOCK_1x2) { release_(MOVE_1x2(code, N + 2, N + 1)); @@ -127,10 +152,8 @@ void S2Mover::two_space_a(const uint64_t code) const { } template -void S2Mover::two_space_b(const uint64_t code) const { - // ---------------- case up ---------------- - - if (N >= 4) { +void S2Mover::move_double_v(const uint64_t code) const { + if (N >= 4) { // case up if (CAPTURE(code, N - 4) == BLOCK_1x1) { release_(MOVE_1x1(code, N - 4, N)); release_(MOVE_1x1(code, N - 4, N + 4)); @@ -140,9 +163,7 @@ void S2Mover::two_space_b(const uint64_t code) const { } } - // ---------------- case down ---------------- - - if (N < 12) { + if (N < 12) { // case down if (CAPTURE(code, N + 8) == BLOCK_1x1) { release_(MOVE_1x1(code, N + 8, N + 4)); release_(MOVE_1x1(code, N + 8, N)); @@ -152,27 +173,23 @@ void S2Mover::two_space_b(const uint64_t code) const { } } - // ---------------- case left ---------------- - - do { + do { // case left if (N % 4 != 0) { - if (const uint8_t block = CAPTURE(code, N - 1); block == BLOCK_1x1) { + if (const uint8_t block = CAPTURE(code, N - 1); block == BLOCK_1x1) { // up part release_(MOVE_1x1(code, N - 1, N)); release_(MOVE_1x1(code, N - 1, N + 4)); } else if (block == BLOCK_2x1) { release_(MOVE_2x1(code, N - 1, N)); break; } else if (N % 4 >= 2 && block == BLOCK_fill) { - const uint8_t left_d = CAPTURE(code, N - 2); - if (left_d == BLOCK_2x2) { + if (const uint8_t block_ = CAPTURE(code, N - 2); block_ == BLOCK_2x2) { release_(MOVE_2x2(code, N - 2, N - 1)); break; - } - if (left_d == BLOCK_1x2) { + } else if (block_ == BLOCK_1x2) { release_(MOVE_1x2(code, N - 2, N - 1)); } } - if (const uint8_t block = CAPTURE(code, N + 3); block == BLOCK_1x1) { + if (const uint8_t block = CAPTURE(code, N + 3); block == BLOCK_1x1) { // down part release_(MOVE_1x1(code, N + 3, N + 4)); release_(MOVE_1x1(code, N + 3, N)); } else if (N % 4 >= 2 && block == BLOCK_fill && CAPTURE(code, N + 2) == BLOCK_1x2) { @@ -181,11 +198,9 @@ void S2Mover::two_space_b(const uint64_t code) const { } } while (false); - // ---------------- case right ---------------- - - do { + do { // case right if (N % 4 < 3) { - if (const uint8_t block = CAPTURE(code, N + 1); block == BLOCK_1x1) { + if (const uint8_t block = CAPTURE(code, N + 1); block == BLOCK_1x1) { // up part release_(MOVE_1x1(code, N + 1, N)); release_(MOVE_1x1(code, N + 1, N + 4)); } else if (N % 4 < 2 && block == BLOCK_1x2) { @@ -197,7 +212,7 @@ void S2Mover::two_space_b(const uint64_t code) const { release_(MOVE_2x2(code, N + 1, N)); break; } - if (const uint8_t block = CAPTURE(code, N + 5); block == BLOCK_1x1) { + if (const uint8_t block = CAPTURE(code, N + 5); block == BLOCK_1x1) { // down part release_(MOVE_1x1(code, N + 5, N + 4)); release_(MOVE_1x1(code, N + 5, N)); } else if (N % 4 < 2 && block == BLOCK_1x2) { @@ -207,129 +222,107 @@ void S2Mover::two_space_b(const uint64_t code) const { } while (false); } -template -void S2Mover::one_space(const uint64_t code) const { - if (N >= 4) { // case up - if (CAPTURE(code, N - 4) == BLOCK_1x1) { - release_(MOVE_1x1(code, N - 4, N)); - } else if (N >= 8 && CAPTURE(code, N - 8) == BLOCK_2x1) { - release_(MOVE_2x1(code, N - 8, N - 4)); - } - } - - if (N < 16) { // case down - if (const uint8_t block = CAPTURE(code, N + 4); block == BLOCK_1x1) { - release_(MOVE_1x1(code, N + 4, N)); - } else if (N < 12 && block == BLOCK_2x1) { - release_(MOVE_2x1(code, N + 4, N)); - } - } - - if (N % 4 >= 1) { // case left - if (CAPTURE(code, N - 1) == BLOCK_1x1) { - release_(MOVE_1x1(code, N - 1, N)); - } else if (N % 4 >= 2 && CAPTURE(code, N - 2) == BLOCK_1x2) { - release_(MOVE_1x2(code, N - 2, N - 1)); - } - } - - if (N % 4 < 3) { // case right - if (const uint8_t block = CAPTURE(code, N + 1); block == BLOCK_1x1) { - release_(MOVE_1x1(code, N + 1, N)); - } else if (N % 4 < 2 && block == BLOCK_1x2) { - release_(MOVE_1x2(code, N + 1, N)); - } - } -} - -void S2Mover::two_space_a_(uint64_t code, int offset) const { +void S2Mover::move_double_h(uint64_t code, int offset) const { switch (offset) { - case 0: two_space_a<0>(code); break; - case 1: two_space_a<1>(code); break; - case 2: two_space_a<2>(code); break; - case 4: two_space_a<4>(code); break; - case 5: two_space_a<5>(code); break; - case 6: two_space_a<6>(code); break; - case 8: two_space_a<8>(code); break; - case 9: two_space_a<9>(code); break; - case 10: two_space_a<10>(code); break; - case 12: two_space_a<12>(code); break; - case 13: two_space_a<13>(code); break; - case 14: two_space_a<14>(code); break; - case 16: two_space_a<16>(code); break; - case 17: two_space_a<17>(code); break; - case 18: two_space_a<18>(code); break; + case 0: move_double_h<0>(code); break; + case 1: move_double_h<1>(code); break; + case 2: move_double_h<2>(code); break; + case 4: move_double_h<4>(code); break; + case 5: move_double_h<5>(code); break; + case 6: move_double_h<6>(code); break; + case 8: move_double_h<8>(code); break; + case 9: move_double_h<9>(code); break; + case 10: move_double_h<10>(code); break; + case 12: move_double_h<12>(code); break; + case 13: move_double_h<13>(code); break; + case 14: move_double_h<14>(code); break; + case 16: move_double_h<16>(code); break; + case 17: move_double_h<17>(code); break; + case 18: move_double_h<18>(code); break; default: std::unreachable(); } } -void S2Mover::two_space_b_(uint64_t code, int offset) const { +void S2Mover::move_double_v(uint64_t code, int offset) const { switch (offset) { - case 0: two_space_b<0>(code); break; - case 1: two_space_b<1>(code); break; - case 2: two_space_b<2>(code); break; - case 3: two_space_b<3>(code); break; - case 4: two_space_b<4>(code); break; - case 5: two_space_b<5>(code); break; - case 6: two_space_b<6>(code); break; - case 7: two_space_b<7>(code); break; - case 8: two_space_b<8>(code); break; - case 9: two_space_b<9>(code); break; - case 10: two_space_b<10>(code); break; - case 11: two_space_b<11>(code); break; - case 12: two_space_b<12>(code); break; - case 13: two_space_b<13>(code); break; - case 14: two_space_b<14>(code); break; - case 15: two_space_b<15>(code); break; + case 0: move_double_v<0>(code); break; + case 1: move_double_v<1>(code); break; + case 2: move_double_v<2>(code); break; + case 3: move_double_v<3>(code); break; + case 4: move_double_v<4>(code); break; + case 5: move_double_v<5>(code); break; + case 6: move_double_v<6>(code); break; + case 7: move_double_v<7>(code); break; + case 8: move_double_v<8>(code); break; + case 9: move_double_v<9>(code); break; + case 10: move_double_v<10>(code); break; + case 11: move_double_v<11>(code); break; + case 12: move_double_v<12>(code); break; + case 13: move_double_v<13>(code); break; + case 14: move_double_v<14>(code); break; + case 15: move_double_v<15>(code); break; default: std::unreachable(); } } -void S2Mover::one_space_(uint64_t code, int offset) const { +void S2Mover::move_single(uint64_t code, int offset) const { switch (offset) { - case 0: one_space<0>(code); break; - case 1: one_space<1>(code); break; - case 2: one_space<2>(code); break; - case 3: one_space<3>(code); break; - case 4: one_space<4>(code); break; - case 5: one_space<5>(code); break; - case 6: one_space<6>(code); break; - case 7: one_space<7>(code); break; - case 8: one_space<8>(code); break; - case 9: one_space<9>(code); break; - case 10: one_space<10>(code); break; - case 11: one_space<11>(code); break; - case 12: one_space<12>(code); break; - case 13: one_space<13>(code); break; - case 14: one_space<14>(code); break; - case 15: one_space<15>(code); break; - case 16: one_space<16>(code); break; - case 17: one_space<17>(code); break; - case 18: one_space<18>(code); break; - case 19: one_space<19>(code); break; + case 0: move_single<0>(code); break; + case 1: move_single<1>(code); break; + case 2: move_single<2>(code); break; + case 3: move_single<3>(code); break; + case 4: move_single<4>(code); break; + case 5: move_single<5>(code); break; + case 6: move_single<6>(code); break; + case 7: move_single<7>(code); break; + case 8: move_single<8>(code); break; + case 9: move_single<9>(code); break; + case 10: move_single<10>(code); break; + case 11: move_single<11>(code); break; + case 12: move_single<12>(code); break; + case 13: move_single<13>(code); break; + case 14: move_single<14>(code); break; + case 15: move_single<15>(code); break; + case 16: move_single<16>(code); break; + case 17: move_single<17>(code); break; + case 18: move_single<18>(code); break; + case 19: move_single<19>(code); break; default: std::unreachable(); } } void S2Mover::next_cases(const uint64_t code) { - int space_1 = -1; - int space_2 = -1; - for (int addr = 0; addr < 20; ++addr) { - if (((code >> (addr * 3)) & 0b111) == 0) { - if (space_1 == -1) { - space_1 = addr; - continue; - } - space_2 = addr; - } - } + + uint64_t tmp = (code | (code >> 1) | (code >> 2)) | ~0x0249249249249249; + + // constexpr auto kk = std::to_array({ + // 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, + // 4, 0, 0, 5, 0, 0, 6, 0, 0, 7, 0, 0, + // 8, 0, 0, 9, 0, 0, 10, 0, 0, 11, 0, 0, + // 12, 0, 0, 13, 0, 0, 14, 0, 0, 15, 0, 0, + // 16, 0, 0, 17, 0, 0, 18, 0, 0, 19, 0, 0, + // }); + + // int val_1 = std::countr_one(tmp); + // int val_2 = 63 - std::countl_one(tmp); + + // KLSK_ASSUME(val_1 >= 0 && val_1 <= 57 && val_1 % 3 == 0); + // KLSK_ASSUME(val_2 >= 0 && val_2 <= 57 && val_2 % 3 == 0); + + // int space_1 = kk[val_1]; + // int space_2 = kk[val_2]; + // int space_1 = val_1 / 3; + // int space_2 = val_2 / 3; + + int space_1 = std::countr_one(tmp) / 3; + int space_2 = (63 - std::countl_one(tmp)) / 3; if (space_1 + 1 == space_2 && space_1 % 4 != 3) { - two_space_a_(code, space_1); + move_double_h(code, space_1); } else if (space_1 + 4 == space_2) { - two_space_b_(code, space_1); + move_double_v(code, space_1); } else { - one_space_(code, space_1); - one_space_(code, space_2); + move_single(code, space_1); + move_single(code, space_2); } } diff --git a/src/core/mover/s2_mover.h b/src/core/mover/s2_mover.h index 96f084a..bc027ac 100644 --- a/src/core/mover/s2_mover.h +++ b/src/core/mover/s2_mover.h @@ -16,17 +16,17 @@ private: release_t release_; template - void one_space(uint64_t code) const; + void move_single(uint64_t code) const; template - void two_space_a(uint64_t code) const; + void move_double_h(uint64_t code) const; template - void two_space_b(uint64_t code) const; + void move_double_v(uint64_t code) const; - void one_space_(uint64_t code, int offset) const; - void two_space_a_(uint64_t code, int offset) const; - void two_space_b_(uint64_t code, int offset) const; + void move_single(uint64_t code, int offset) const; + void move_double_h(uint64_t code, int offset) const; + void move_double_v(uint64_t code, int offset) const; }; } // namespace klotski::mover