From 265eb230f4f2ebcfda8fbf1a3d1aa63667593010 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 25 May 2024 22:58:21 +0800 Subject: [PATCH] perf: update AllCases module --- src/core/CMakeLists.txt | 2 +- src/core/all_cases/all_cases.h | 16 +-- src/core/all_cases/internal/all_cases.cc | 131 +++++++++++++---------- src/core/all_cases/internal/derive.cc | 113 ------------------- src/core/benchmark/group.cc | 16 ++- src/core/main.cc | 36 +++---- src/core_ffi/c_ffi/all_cases.cc | 14 +-- src/core_test/cases/all_cases.cc | 26 ++--- 8 files changed, 124 insertions(+), 230 deletions(-) delete mode 100644 src/core/all_cases/internal/derive.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 14a0f90..dbca9e2 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -6,7 +6,7 @@ set(CMAKE_CXX_STANDARD 23) set(KLOTSKI_CORE_SRC all_cases/internal/basic_ranges.cc all_cases/internal/all_cases.cc - all_cases/internal/derive.cc +# all_cases/internal/derive.cc common_code/internal/common_code.cc common_code/internal/serialize.cc diff --git a/src/core/all_cases/all_cases.h b/src/core/all_cases/all_cases.h index 9cae8c7..7486071 100644 --- a/src/core/all_cases/all_cases.h +++ b/src/core/all_cases/all_cases.h @@ -106,7 +106,7 @@ public: /// TODO: remove this interface /// Execute the build process with parallel support and ensure thread safety. - void build_parallel(Executor &&executor); + // void build_parallel(Executor &&executor); /// Execute the build process in parallel without blocking. void build_parallel_async(Executor &&executor, Notifier &&callback); @@ -127,20 +127,6 @@ private: KLSK_INSTANCE(AllCases) }; -inline const std::vector& get_reversed() { - static auto value = []() { - std::vector ranges {BasicRanges::instance().fetch()}; - for (auto &x : ranges) { - x = range_reverse(x); - } - return ranges; - }(); - return value; -} - -void global_derive(const std::vector &range, std::vector &output, int head); -void global_derive_pro(const std::vector &range, const std::vector &reversed, std::vector &output, int head); - // ------------------------------------------------------------------------------------- // } // namespace klotski::cases diff --git a/src/core/all_cases/internal/all_cases.cc b/src/core/all_cases/internal/all_cases.cc index 819b762..d9e056e 100644 --- a/src/core/all_cases/internal/all_cases.cc +++ b/src/core/all_cases/internal/all_cases.cc @@ -1,14 +1,17 @@ #include +#include "utils/utility.h" +#include "ranges/ranges.h" #include "all_cases/all_cases.h" +using klotski::range_reverse; using klotski::cases::Ranges; using klotski::cases::AllCases; using klotski::cases::BasicRanges; using klotski::cases::ALL_CASES_NUM; /// Generate all possible klotski heads. -consteval static std::array heads() { +static consteval std::array heads() { std::array heads {}; for (int i = 0, head = 0; head < 15; ++head) { if (head % 4 != 3) { @@ -18,72 +21,81 @@ consteval static std::array heads() { return heads; } +/// Check whether the combination of head and range is valid. +static int check_range(const int head, uint32_t range) { + uint32_t flags = 0b110011 << head; // fill 2x2 block + for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit + const auto num = std::countr_one(flags); + addr += num; // next unfilled block + flags >>= num; + switch (range & 0b11) { + case 0b00: // space + case 0b11: // 1x1 block + flags |= 0b1; + continue; + case 0b01: // 1x2 block + if (flags & 0b10 || addr % 4 == 3) { // invalid case + return offset; // broken offset + } + flags |= 0b11; + continue; + case 0b10: // 2x1 block + if (flags & 0b10000 || addr > 15) { // invalid case + return offset; // broken offset + } + flags |= 0b10001; + } + } + return 0; // pass check +} + /// Build all valid ranges of the specified head. -static void build_cases(const int head, Ranges &release) { +static void build_cases(const std::vector &ranges, + const std::vector &reversed, Ranges &release, const int head) { release.clear(); release.reserve(ALL_CASES_NUM[head]); - // klotski::cases::global_derive(BasicRanges::instance().fetch(), release, head); - klotski::cases::global_derive_pro(BasicRanges::instance().fetch(), klotski::cases::get_reversed(), release, head); - - // BasicRanges::instance().fetch().derive(head, release); -} - -void AllCases::build() { - - // TODO: lock here - - // klotski::cases::get_reversed(); - - std::vector reversed {BasicRanges::instance().fetch()}; - for (auto &x : reversed) { - x = range_reverse(x); - } - - // std::vector reversed; - // std::ranges::transform(BasicRanges::instance().fetch(), std::back_inserter(reversed), [](uint32_t x) { return range_reverse(x); }); - - // auto &reversed = get_reversed(); - - for (auto head : heads()) { - // build_cases(head, get_cases()[head]); - - auto &release = get_cases()[head]; - - release.clear(); - release.reserve(ALL_CASES_NUM[head]); + for (uint32_t index = 0; index < reversed.size(); ++index) { + if (const auto offset = check_range(head, reversed[index])) { // invalid case + if (offset > 14) { + continue; + } - // klotski::cases::global_derive(BasicRanges::instance().fetch(), release, head); - // klotski::cases::global_derive_pro(BasicRanges::instance().fetch(), reversed, release, head); - klotski::cases::global_derive_pro(reversed, BasicRanges::instance().fetch(), release, head); + // !! <- broken + // ( xx xx xx ) xx xx xx ... [range] + // +1 00 00 00 ... (delta) + const int tmp = (16 - offset) * 2; + uint32_t min_next = ((ranges[index] >> tmp) + 1) << tmp; // next possible range + if (offset > 5) { // located next range by min_next + while (ranges[++index] < min_next) {} + } else { + index = std::lower_bound(ranges.begin() + index, ranges.end(), min_next) - ranges.begin(); + } + --index; + continue; + } + release.emplace_back(range_reverse(reversed[index])); // release valid case } - available_ = true; - - // build_parallel([](auto &&func) { - // func(); - // }); } -void AllCases::build_parallel(Executor &&executor) { +void AllCases::build() { if (available_) { return; // reduce consumption of mutex } - std::lock_guard guard(building_); + + std::lock_guard guard {building_}; if (available_) { return; // data is already available } - std::vector> futures; - for (auto head : heads()) { - auto promise = std::make_shared>(); - futures.emplace_back(promise->get_future()); - executor([head, promise = std::move(promise)]() { - build_cases(head, get_cases()[head]); - promise->set_value(); // subtask completed notification - }); + + const auto &ranges = BasicRanges::instance().fetch(); + std::vector reversed {ranges}; + for (auto &x : reversed) { + x = range_reverse(x); } - for (auto &x : futures) { - x.get(); // wait until all subtasks completed + for (const auto head : heads()) { + build_cases(ranges, reversed, get_cases()[head], head); } available_ = true; } @@ -93,17 +105,24 @@ void AllCases::build_parallel_async(Executor &&executor, Notifier &&callback) { callback(); return; // reduce consumption of mutex } + building_.lock(); if (available_) { building_.unlock(); callback(); return; // data is already available } - auto counter = std::make_shared>(0); - auto all_done = std::make_shared(std::move(callback)); - for (auto head : heads()) { - executor([this, head, counter, all_done]() { - build_cases(head, get_cases()[head]); + + const auto counter = std::make_shared>(0); + const auto all_done = std::make_shared(std::move(callback)); + const auto reversed = std::make_shared>(BasicRanges::instance().fetch()); + for (auto &x : *reversed) { + x = range_reverse(x); + } + + for (const auto head : heads()) { + executor([=, this] { + build_cases(BasicRanges::instance().fetch(), *reversed, get_cases()[head], head); if (counter->fetch_add(1) == heads().size() - 1) { // all tasks done available_ = true; building_.unlock(); // release building mutex diff --git a/src/core/all_cases/internal/derive.cc b/src/core/all_cases/internal/derive.cc deleted file mode 100644 index a1435f7..0000000 --- a/src/core/all_cases/internal/derive.cc +++ /dev/null @@ -1,113 +0,0 @@ -#include "utils/utility.h" -#include "ranges/ranges.h" - -#include "all_cases/all_cases.h" - -using klotski::cases::Ranges; - -/// Check whether the combination of head and range is valid. -static int check_range(const int head, uint32_t range) { - uint32_t flags = 0b110011 << head; // fill 2x2 block - for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit - const auto num = std::countr_one(flags); - addr += num; // next unfilled block - flags >>= num; - switch (range & 0b11) { - case 0b00: // space - case 0b11: // 1x1 block - flags |= 0b1; - continue; - case 0b01: // 1x2 block - if (flags & 0b10 || addr % 4 == 3) { // invalid case - return offset; // broken offset - } - flags |= 0b11; - continue; - case 0b10: // 2x1 block - if (flags & 0b10000 || addr > 15) { // invalid case - return offset; // broken offset - } - flags |= 0b10001; - } - } - return 0; // pass check -} - -void klotski::cases::global_derive(const std::vector &range, std::vector &output, int head) { - - for (uint32_t index = 0; index < range.size(); ++index) { - if (const auto offset = check_range(head, range[index])) { // invalid case - - // if (offset > 14) { - // continue; - // } - - int left_offset = (16 - offset) * 2; - uint32_t min_next = ((range_reverse(range[index]) >> left_offset) + 1) << left_offset; - - // if (offset > 5) { - while (range_reverse(range[++index]) < min_next) {} // located next range - --index; - // } else { - // auto begin = reversed.begin() + index; - // auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin; - // index += kk - 1; - // } - - // uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range - /// !! <- broken - /// ( xx xx xx ) xx xx xx ... [reversed range] - /// +1 00 00 00 ... (delta) - // tmp += range_reverse(range[index]) & ~(tmp - 1); - - // TODO: overflow here in some type_id - // TODO: -> tmp > range[-1] - // TODO: maybe using binary search here - - // while (range_reverse(range[++index]) < tmp) {} // located next range - // --index; - continue; - } - output.emplace_back(range_reverse(range[index])); // release valid case - } -} - -void klotski::cases::global_derive_pro(const std::vector &range, const std::vector &reversed, std::vector &output, int head) { - - // uint32_t reversed_max = reversed.back(); - for (uint32_t index = 0; index < range.size(); ++index) { - if (const auto offset = check_range(head, range[index])) { // invalid case - - if (offset > 14) { - continue; - } - - int left_offset = (16 - offset) * 2; - uint32_t min_next = ((reversed[index] >> left_offset) + 1) << left_offset; - - if (offset > 5) { - while (reversed[++index] < min_next) {} // located next range - --index; - } else { - auto begin = reversed.begin() + index; - auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin; - index += kk - 1; - } - - // uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range - /// !! <- broken - /// ( xx xx xx ) xx xx xx ... [reversed range] - /// +1 00 00 00 ... (delta) - // tmp += range_reverse(range[index]) & ~(tmp - 1); - - // TODO: overflow here in some type_id - // TODO: -> tmp > range[-1] - // TODO: maybe using binary search here - - // while (range_reverse(range[++index]) < tmp) {} // located next range - // --index; - continue; - } - output.emplace_back(range_reverse(range[index])); // release valid case - } -} diff --git a/src/core/benchmark/group.cc b/src/core/benchmark/group.cc index 903455c..7d24096 100644 --- a/src/core/benchmark/group.cc +++ b/src/core/benchmark/group.cc @@ -7,6 +7,7 @@ #include +#include "../../../third_party/thread-pool/include/BS_thread_pool.hpp" #include "all_cases/all_cases.h" #undef private @@ -175,12 +176,25 @@ static void OriginBasicRanges(benchmark::State &state) { static void OriginAllCases(benchmark::State &state) { klotski::cases::BasicRanges::instance().build(); - klotski::cases::get_reversed(); + // klotski::cases::get_reversed(); + + // BS::thread_pool pool {4}; for (auto _ : state) { auto &pp = klotski::cases::AllCases::instance(); pp.available_ = false; pp.build(); + + // pp.build_parallel_async([](auto func) {func();}, []() { + // // std::cout << "hello" << std::endl; + // }); + + // pp.build_parallel_async([&pool](auto func) { + // pool.submit_task(func); + // }, [] {}); + // + // pool.wait(); + } } diff --git a/src/core/main.cc b/src/core/main.cc index 9481370..6b6c4cf 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -12,6 +12,8 @@ #include "short_code/short_code.h" #include "common_code/common_code.h" +#include "../../third_party/thread-pool/include/BS_thread_pool.hpp" + using klotski::core::Core; using klotski::cases::AllCases; @@ -25,35 +27,21 @@ using klotski::cases::GroupUnion; using klotski::codec::SHORT_CODE_LIMIT; int main() { - const auto start = clock(); - - auto &basic_ranges = klotski::cases::BasicRanges::instance().fetch(); - - klotski::cases::Ranges flip {basic_ranges}; - for (auto &x : flip) { - x = klotski::range_reverse(x); - } + // const auto start = clock(); - klotski::cases::Ranges results; - results.reserve(klotski::cases::ALL_CASES_NUM_); + const auto start = std::chrono::system_clock::now(); - klotski::cases::derive_demo(basic_ranges, flip, results, 0); - klotski::cases::derive_demo(basic_ranges, flip, results, 1); - klotski::cases::derive_demo(basic_ranges, flip, results, 2); + BS::thread_pool pool {}; - klotski::cases::derive_demo(basic_ranges, flip, results, 4); - klotski::cases::derive_demo(basic_ranges, flip, results, 5); - klotski::cases::derive_demo(basic_ranges, flip, results, 6); + klotski::cases::BasicRanges::instance().build(); - klotski::cases::derive_demo(basic_ranges, flip, results, 8); - klotski::cases::derive_demo(basic_ranges, flip, results, 9); - klotski::cases::derive_demo(basic_ranges, flip, results, 10); + klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) { + pool.submit_task(func); + }, [] {}); - klotski::cases::derive_demo(basic_ranges, flip, results, 12); - klotski::cases::derive_demo(basic_ranges, flip, results, 13); - klotski::cases::derive_demo(basic_ranges, flip, results, 14); + pool.wait(); - // std::cout << results.size() << " vs " << klotski::cases::ALL_CASES_NUM_ << std::endl; + std::cerr << std::chrono::system_clock::now() - start << std::endl; // auto raw_code = RawCode::from_common_code(0x1A9BF0C00)->unwrap(); // auto ret = klotski::cases::group_extend_from_seed(raw_code); @@ -92,7 +80,7 @@ int main() { // std::cout << "----" << std::endl; // } - std::cerr << ((clock() - start) * 1000 / CLOCKS_PER_SEC) << "ms" << std::endl; + // std::cerr << ((clock() - start) * 1000 / CLOCKS_PER_SEC) << "ms" << std::endl; return 0; } diff --git a/src/core_ffi/c_ffi/all_cases.cc b/src/core_ffi/c_ffi/all_cases.cc index 946f75a..7ab6a59 100644 --- a/src/core_ffi/c_ffi/all_cases.cc +++ b/src/core_ffi/c_ffi/all_cases.cc @@ -40,13 +40,13 @@ void all_cases_build_async(const executor_t executor, const notifier_t callback) } void all_cases_build_parallel(executor_t executor) { - AllCases::instance().build_parallel([executor](Runner &&runner) { - const auto func = [](void *arg) { - (*static_cast(arg))(); - delete static_cast(arg); - }; - executor(func, new Runner {std::move(runner)}); - }); + // AllCases::instance().build_parallel([executor](Runner &&runner) { + // const auto func = [](void *arg) { + // (*static_cast(arg))(); + // delete static_cast(arg); + // }; + // executor(func, new Runner {std::move(runner)}); + // }); } void all_cases_build_parallel_async(executor_t executor, notifier_t callback) { diff --git a/src/core_test/cases/all_cases.cc b/src/core_test/cases/all_cases.cc index da950f8..6190346 100644 --- a/src/core_test/cases/all_cases.cc +++ b/src/core_test/cases/all_cases.cc @@ -89,23 +89,23 @@ TEST_FF(AllCases, all_cases_race) { } TEST_FF(AllCases, all_cases_parallel) { - AllCases::instance().build_parallel(executor_.Entry()); - EXPECT_TRUE(Available()); - Verify(); + // AllCases::instance().build_parallel(executor_.Entry()); + // EXPECT_TRUE(Available()); + // Verify(); - AllCases::instance().build_parallel(executor_.Entry()); - EXPECT_TRUE(Available()); - Verify(); + // AllCases::instance().build_parallel(executor_.Entry()); + // EXPECT_TRUE(Available()); + // Verify(); } TEST_FF(AllCases, all_cases_parallel_race) { - racer_.Begin([this] { - AllCases::instance().build_parallel(executor_.Entry()); - }); - EXPECT_FALSE(Available()); - racer_.Join(); - EXPECT_TRUE(Available()); - Verify(); + // racer_.Begin([this] { + // AllCases::instance().build_parallel(executor_.Entry()); + // }); + // EXPECT_FALSE(Available()); + // racer_.Join(); + // EXPECT_TRUE(Available()); + // Verify(); } TEST_FF(AllCases, all_cases_async) {