From 8686e25bf2ea6fa6be24691a59d406774df55dc7 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 25 May 2024 16:55:37 +0800 Subject: [PATCH] perf: update Range module --- src/core/CMakeLists.txt | 2 +- src/core/all_cases/all_cases.h | 5 +- src/core/all_cases/internal/all_cases.cc | 20 +++--- src/core/all_cases/internal/basic_ranges.cc | 12 ++-- src/core/benchmark/group.cc | 39 +++++++++++- src/core/main.cc | 8 --- src/core/ranges/internal/derive.cc | 48 ++++++++++++++ src/core/ranges/internal/head.cc | 57 ----------------- src/core/ranges/internal/ranges.cc | 60 +++++++----------- src/core/ranges/ranges.h | 70 ++------------------- src/core/utils/utility.h | 2 + src/core_test/cases/all_cases.cc | 2 +- src/core_test/cases/basic_ranges.cc | 6 +- 13 files changed, 140 insertions(+), 191 deletions(-) create mode 100644 src/core/ranges/internal/derive.cc delete mode 100644 src/core/ranges/internal/head.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 9678047..0cad65c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -25,7 +25,7 @@ set(KLOTSKI_CORE_SRC group/internal/group.cc ranges/internal/ranges.cc - ranges/internal/head.cc + ranges/internal/derive.cc ) add_library(klotski_core STATIC ${KLOTSKI_CORE_SRC}) diff --git a/src/core/all_cases/all_cases.h b/src/core/all_cases/all_cases.h index 249b029..4fa9693 100644 --- a/src/core/all_cases/all_cases.h +++ b/src/core/all_cases/all_cases.h @@ -58,7 +58,7 @@ typedef std::function&&)> Executor; // ------------------------------------------------------------------------------------- // -constexpr auto BASIC_RANGES_NUM = 7311921; +constexpr auto BASIC_RANGES_NUM = 7311885; constexpr std::array ALL_CASES_NUM { 2942906, 2260392, 2942906, 0, @@ -124,9 +124,6 @@ private: /// Get static singleton variable. static RangesUnion& get_cases(); - /// Build all valid ranges of the specified head. - static void build_cases(int head, Ranges &release); - KLSK_INSTANCE(AllCases) }; diff --git a/src/core/all_cases/internal/all_cases.cc b/src/core/all_cases/internal/all_cases.cc index 3d84acc..1da6950 100644 --- a/src/core/all_cases/internal/all_cases.cc +++ b/src/core/all_cases/internal/all_cases.cc @@ -2,11 +2,14 @@ #include "all_cases/all_cases.h" +using klotski::cases::Ranges; using klotski::cases::AllCases; +using klotski::cases::BasicRanges; +using klotski::cases::ALL_CASES_NUM; -/// Calculate all possible klotski heads. -consteval static std::array case_heads() { - std::array heads = {}; +/// Generate all possible klotski heads. +consteval static std::array heads() { + std::array heads {}; for (int i = 0, head = 0; head < 15; ++head) { if (head % 4 != 3) { heads[i++] = head; @@ -15,10 +18,11 @@ consteval static std::array case_heads() { return heads; } -void AllCases::build_cases(const int head, Ranges &release) { +/// Build all valid ranges of the specified head. +static void build_cases(const int head, Ranges &release) { release.clear(); release.reserve(ALL_CASES_NUM[head]); - BasicRanges::instance().fetch().with_head(head, release); + BasicRanges::instance().fetch().derive(head, release); } void AllCases::build() { @@ -36,7 +40,7 @@ void AllCases::build_parallel(Executor &&executor) { return; // data is already available } std::vector> futures; - for (auto head : case_heads()) { + for (auto head : heads()) { auto promise = std::make_shared>(); futures.emplace_back(promise->get_future()); executor([head, promise = std::move(promise)]() { @@ -63,10 +67,10 @@ void AllCases::build_parallel_async(Executor &&executor, Notifier &&callback) { } auto counter = std::make_shared>(0); auto all_done = std::make_shared(std::move(callback)); - for (auto head : case_heads()) { + for (auto head : heads()) { executor([this, head, counter, all_done]() { build_cases(head, get_cases()[head]); - if (counter->fetch_add(1) == case_heads().size() - 1) { // all tasks done + if (counter->fetch_add(1) == heads().size() - 1) { // all tasks done available_ = true; building_.unlock(); // release building mutex all_done->operator()(); // trigger callback diff --git a/src/core/all_cases/internal/basic_ranges.cc b/src/core/all_cases/internal/basic_ranges.cc index 39996f8..ff718d0 100644 --- a/src/core/all_cases/internal/basic_ranges.cc +++ b/src/core/all_cases/internal/basic_ranges.cc @@ -8,14 +8,16 @@ using klotski::cases::Ranges; using klotski::cases::BasicRanges; typedef std::tuple RangeType; -typedef std::array RangeTypeUnion; +typedef std::array RangeTypeUnion; /// Generate all possible basic-ranges permutations. consteval static RangeTypeUnion range_types() { RangeTypeUnion data; for (int i = 0, n = 0; n <= 7; ++n) { // 1x2 + 2x1 -> 0 ~ 7 for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) { // 2x1 -> 0 ~ n - // TODO: skip n == 7 && n_2x1 == 7 + if (n == 7 && n_2x1 == 7) { + break; + } for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { // 1x1 -> 0 ~ (14 - 2n) data[i++] = {n, n_2x1, n_1x1}; } @@ -48,7 +50,7 @@ void BasicRanges::build_ranges(Ranges &ranges) { std::list flags { ranges.begin() }; for (auto [n, n_2x1, n_1x1] : range_types()) { - ranges.spawn_more(n, n_2x1, n_1x1); + ranges.spawn(n, n_2x1, n_1x1); flags.emplace_back(ranges.end()); // mark ordered interval } @@ -61,5 +63,7 @@ void BasicRanges::build_ranges(Ranges &ranges) { } } while (flags.size() > 2); // merge until only one interval remains - ranges.reverse(); // flip every 2-bit + for (auto &x : ranges) { + x = range_reverse(x); // flip every 2-bit + } } diff --git a/src/core/benchmark/group.cc b/src/core/benchmark/group.cc index 4cc48aa..d2c6ded 100644 --- a/src/core/benchmark/group.cc +++ b/src/core/benchmark/group.cc @@ -16,7 +16,7 @@ using klotski::cases::AllCases; static std::vector all_common_codes() { std::vector codes; for (uint64_t head = 0; head < 16; ++head) { - for (const auto range : AllCases::instance().fetch()[head].ranges_) { + for (const auto range : AllCases::instance().fetch()[head]) { codes.emplace_back(head << 32 | range); } } @@ -159,7 +159,7 @@ static void SpawnRanges(benchmark::State &state) { kk.reserve(7311921); for (auto [n, n_2x1, n_1x1] : nums) { - kk.spawn_more(n, n_2x1, n_1x1); + kk.spawn(n, n_2x1, n_1x1); } } @@ -172,6 +172,35 @@ static void OriginBasicRanges(benchmark::State &state) { } } +static void OriginAllCases(benchmark::State &state) { + + klotski::cases::BasicRanges::instance().build(); + + for (auto _ : state) { + auto &pp = klotski::cases::AllCases::instance(); + pp.available_ = false; + pp.build(); + } + +} + +static void RangesDerive(benchmark::State &state) { + + auto &basic_ranges = klotski::cases::BasicRanges::instance().fetch(); + + klotski::cases::Ranges results; + results.reserve(klotski::cases::ALL_CASES_NUM[5]); + + for (auto _ : state) { + + results.clear(); + basic_ranges.derive(5, results); + + } + + // std::cout << results.size() << " vs " << klotski::cases::ALL_CASES_NUM[5] << std::endl; +} + // BENCHMARK(CommonCodeToTypeId)->Arg(8)->Arg(64)->Arg(256); // BENCHMARK(RawCodeToTypeId)->Arg(8)->Arg(64)->Arg(256); @@ -181,6 +210,10 @@ static void OriginBasicRanges(benchmark::State &state) { // BENCHMARK(SpawnRanges)->Unit(benchmark::kMillisecond); -BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond); +// BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond); + +// BENCHMARK(OriginAllCases)->Unit(benchmark::kMillisecond); + +BENCHMARK(RangesDerive)->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/src/core/main.cc b/src/core/main.cc index 010eeef..82cd3a4 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -27,14 +27,6 @@ using klotski::codec::SHORT_CODE_LIMIT; int main() { const auto start = clock(); - // auto kk = klotski::cases::RangesDemo(); - // - // for (auto x : kk) { - // std::cout << x << std::endl; - // } - - // klotski::cases::spawn_ranges(2, 1, 4, 4); - // auto raw_code = RawCode::from_common_code(0x1A9BF0C00)->unwrap(); // auto ret = klotski::cases::group_extend_from_seed(raw_code); // diff --git a/src/core/ranges/internal/derive.cc b/src/core/ranges/internal/derive.cc new file mode 100644 index 0000000..6c54ea6 --- /dev/null +++ b/src/core/ranges/internal/derive.cc @@ -0,0 +1,48 @@ +#include "utils/utility.h" +#include "ranges/ranges.h" + +using klotski::cases::Ranges; + +/// Check whether the combination of head and range is valid. +static int check_range(const int head, uint32_t range) { + uint32_t flags = 0b110011 << head; // fill 2x2 block + for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit + const auto num = std::countr_one(flags); + addr += num; // next unfilled block + flags >>= num; + switch (range & 0b11) { + case 0b00: // space + case 0b11: // 1x1 block + flags |= 0b1; + continue; + case 0b01: // 1x2 block + if (flags & 0b10 || addr % 4 == 3) { // invalid case + return offset; // broken offset + } + flags |= 0b11; + continue; + case 0b10: // 2x1 block + if (flags & 0b10000 || addr > 15) { // invalid case + return offset; // broken offset + } + flags |= 0b10001; + } + } + return 0; // pass check +} + +void Ranges::derive(const int head, Ranges &output) const { + for (uint32_t index = 0; index < size(); ++index) { + if (const auto offset = check_range(head, (*this)[index])) { // invalid case + uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range + /// !! <- broken + /// ( xx xx xx ) xx xx xx ... [reversed range] + /// +1 00 00 00 ... (delta) + tmp += range_reverse((*this)[index]) & ~(tmp - 1); + while (range_reverse((*this)[++index]) < tmp) {} // located next range + --index; + continue; + } + output.emplace_back(range_reverse((*this)[index])); // release valid case + } +} diff --git a/src/core/ranges/internal/head.cc b/src/core/ranges/internal/head.cc deleted file mode 100644 index 6560faa..0000000 --- a/src/core/ranges/internal/head.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include "utils/utility.h" -#include "ranges/ranges.h" - -/// Check whether the combination of head and range is valid. -static int check_range(const int head, uint32_t range) noexcept { - constexpr uint32_t M_1x1 = 0b00000001; - constexpr uint32_t M_1x2 = 0b00000011; - constexpr uint32_t M_2x1 = 0b00010001; - constexpr uint32_t M_2x2 = 0b00110011; - - uint32_t flags = M_2x2 << head; // fill 2x2 block - for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit - const auto num = klotski::low_zero_num(~flags); - addr += num; // next unfilled block - flags >>= num; - switch (range & 0b11) { - case 0b00: // space - case 0b11: // 1x1 block - flags |= M_1x1; - continue; - case 0b10: // 2x1 block - if ((flags >> 4) & 0b1 || addr > 15) { // invalid case - return offset; // broken offset - } - flags |= M_2x1; - continue; - case 0b01: // 1x2 block - if ((flags >> 1) & 0b1 || (addr & 0b11) == 0b11) { // invalid case - return offset; // broken offset - } - flags |= M_1x2; - continue; - } - } - return 0; // pass check -} - -void klotski::cases::Ranges::with_head(const int head, Ranges &release) const { - // release.clear(); - // release.reserve(ALL_CASES_NUM[head]); - // auto &basic_ranges = BasicRanges::instance().fetch(); - - for (uint32_t index = 0; index < ranges_.size(); ++index) { - auto offset = check_range(head, ranges_[index]); - if (offset) { // invalid case - auto tmp = (uint32_t)0b1 << (32 - offset * 2); // distance to next possible range - /// !! <- broken - /// ( xx xx xx ) xx xx xx ... [reversed range] - /// +1 00 00 00 ... (delta) - tmp += klotski::range_reverse(ranges_[index]) & ~(tmp - 1); - while (klotski::range_reverse(ranges_[++index]) < tmp); // located next range - --index; - continue; - } - release.ranges_.emplace_back(klotski::range_reverse(ranges_[index])); // release valid case - } -} diff --git a/src/core/ranges/internal/ranges.cc b/src/core/ranges/internal/ranges.cc index c362b68..b95e4ed 100644 --- a/src/core/ranges/internal/ranges.cc +++ b/src/core/ranges/internal/ranges.cc @@ -1,54 +1,40 @@ -#include #include -#include "ranges/ranges.h" #include "utils/utility.h" +#include "ranges/ranges.h" -template -static void demo(std::vector &ranges, int n_10, int n_11) { - - constexpr auto num = 16 - N; - constexpr auto offset = (16 - num) << 1; // offset of low bits +using klotski::cases::Ranges; - int n_00 = 16 - N * 2 - n_11; +template +static void build_ranges(std::vector &ranges, int n_10, int n_11) { int n_01 = N - n_10; + int n_00 = 16 - N * 2 - n_11; - std::array series {}; - - auto kk = std::fill_n(series.begin() + n_00, n_01, 0b01); - auto pp = std::fill_n(kk, n_10, 0b10); - std::fill_n(pp, n_11, 0b11); - - // std::vector ranges; + std::array series {}; + std::fill_n(series.begin() + n_00, n_01, 0b01); + std::fill_n(series.begin() + n_00 + n_01, n_10, 0b10); + std::fill_n(series.begin() + n_00 + n_01 + n_10, n_11, 0b11); do { uint32_t range = 0; - for (const auto x : series) // store every 2-bit + for (const auto x : series) { // store every 2-bit (range <<= 2) |= x; - ranges.emplace_back(range << offset); + } + ranges.emplace_back(range << (N * 2)); } while (std::ranges::next_permutation(series).found); - - // return ranges; } -// void klotski::cases::spawn_ranges(std::vector &ranges, int n, int n_2x1, int n_1x1) { -// -// -// } - -void klotski::cases::Ranges::spawn_more(int n, int n_2x1, int n_1x1) { - // spawn_ranges(ranges_, n, n_2x1, n_1x1); - +void Ranges::spawn(const int n, const int n_2x1, const int n_1x1) { + KLSK_ASSUME(n >= 0 && n_2x1 >= 0 && n_1x1 >= 0); + KLSK_ASSUME(n <= 7 && n_2x1 <= n && n_1x1 + n * 2 <= 14); switch (n) { - case 0: return demo<0>(ranges_, n_2x1, n_1x1); - case 1: return demo<1>(ranges_, n_2x1, n_1x1); - case 2: return demo<2>(ranges_, n_2x1, n_1x1); - case 3: return demo<3>(ranges_, n_2x1, n_1x1); - case 4: return demo<4>(ranges_, n_2x1, n_1x1); - case 5: return demo<5>(ranges_, n_2x1, n_1x1); - case 6: return demo<6>(ranges_, n_2x1, n_1x1); - case 7: return demo<7>(ranges_, n_2x1, n_1x1); - default: return; + case 0: return build_ranges<0>(*this, n_2x1, n_1x1); + case 1: return build_ranges<1>(*this, n_2x1, n_1x1); + case 2: return build_ranges<2>(*this, n_2x1, n_1x1); + case 3: return build_ranges<3>(*this, n_2x1, n_1x1); + case 4: return build_ranges<4>(*this, n_2x1, n_1x1); + case 5: return build_ranges<5>(*this, n_2x1, n_1x1); + case 6: return build_ranges<6>(*this, n_2x1, n_1x1); + case 7: return build_ranges<7>(*this, n_2x1, n_1x1); } - } diff --git a/src/core/ranges/ranges.h b/src/core/ranges/ranges.h index a3a9d39..1ec3ba2 100644 --- a/src/core/ranges/ranges.h +++ b/src/core/ranges/ranges.h @@ -3,75 +3,15 @@ #include #include -#include "utils/utility.h" - namespace klotski::cases { -// void spawn_ranges(std::vector &ranges, int n, int n_2x1, int n_1x1); - -// std::vector basic_ranges(); - -// TODO: should we inherit on `std::vector` ? -class Ranges { +class Ranges : public std::vector { public: - void spawn_more(int n, int n_2x1, int n_1x1); - - using iterator = std::vector::iterator; - using size_type = std::vector::size_type; - using const_iterator = std::vector::const_iterator; - - using value_type = std::vector::value_type; - - using reference = std::vector::reference; - - using const_reference = std::vector::const_reference; - - iterator begin() { - return ranges_.begin(); - } - - iterator end() { - return ranges_.end(); - } - - [[nodiscard]] const_iterator begin() const { - return ranges_.begin(); - } - - [[nodiscard]] const_iterator end() const { - return ranges_.end(); - } - - void clear() { - ranges_.clear(); - } - - [[nodiscard]] size_type size() const { - return ranges_.size(); - } - - void reserve(const size_type cap) { - ranges_.reserve(cap); - } - - void with_head(int head, Ranges &release) const; - - void reverse() { - for (auto &x : ranges_) { - x = range_reverse(x); // flip every 2-bit - } - } - - const_reference operator[](const size_type n) const { - return ranges_[n]; - } - - [[nodiscard]] const value_type* data() const { - return ranges_.data(); - } + /// Spawn klotski-ranges that match the specified block numbers. + void spawn(int n, int n_2x1, int n_1x1); -// private: - std::vector ranges_ {}; + /// Derive the legal klotski-ranges with specified head. + void derive(int head, Ranges &output) const; }; } // namespace klotski::cases diff --git a/src/core/utils/utility.h b/src/core/utils/utility.h index edb2cb9..3db8b73 100644 --- a/src/core/utils/utility.h +++ b/src/core/utils/utility.h @@ -18,6 +18,8 @@ #define KLSK_INLINE __attribute__((always_inline)) +#define KLSK_ASSUME(expr) __builtin_assume(expr) + namespace klotski { /// Get the number of consecutive `0` in the low bits. diff --git a/src/core_test/cases/all_cases.cc b/src/core_test/cases/all_cases.cc index 206237c..da950f8 100644 --- a/src/core_test/cases/all_cases.cc +++ b/src/core_test/cases/all_cases.cc @@ -39,7 +39,7 @@ protected: EXPECT_EQ(all_cases_num, ALL_CASES_NUM_); // verify all cases global size for (int head = 0; head < 16; ++head) { - EXPECT_EQ(hash::xxh3(all_cases[head].ranges_), ALL_CASES_XXH3[head]); // verify all cases checksum + EXPECT_EQ(hash::xxh3(all_cases[head]), ALL_CASES_XXH3[head]); // verify all cases checksum } } }; diff --git a/src/core_test/cases/basic_ranges.cc b/src/core_test/cases/basic_ranges.cc index 526367c..b85aad9 100644 --- a/src/core_test/cases/basic_ranges.cc +++ b/src/core_test/cases/basic_ranges.cc @@ -1,7 +1,7 @@ #include "hash.h" #include "helper.h" -static constexpr uint64_t BASIC_RANGES_XXH3 = 0x82b040060044e336; +static constexpr uint64_t BASIC_RANGES_XXH3 = 0x2ced674494fe904d; class BasicRangesTest : public testing::Test, public Concurrent { protected: @@ -24,12 +24,12 @@ protected: static void Verify() { const auto &basic_ranges = BasicRanges::instance().fetch(); EXPECT_EQ(basic_ranges.size(), BASIC_RANGES_NUM); // verify basic ranges size - EXPECT_EQ(hash::xxh3(basic_ranges.ranges_), BASIC_RANGES_XXH3); // verify basic ranges checksum + EXPECT_EQ(hash::xxh3(basic_ranges), BASIC_RANGES_XXH3); // verify basic ranges checksum } }; TEST_FF(BasicRanges, constant) { - EXPECT_EQ(BASIC_RANGES_NUM, 7311921); + EXPECT_EQ(BASIC_RANGES_NUM, 7311885); } TEST_FF(BasicRanges, basic_ranges) {