Browse Source

perf: optimize BasicRanges calculation

master
Dnomd343 1 month ago
parent
commit
986bae9f8c
  1. 37
      src/core/all_cases/all_cases.h
  2. 2
      src/core/all_cases/internal/all_cases.cc
  3. 143
      src/core/all_cases/internal/basic_ranges.cc
  4. 11
      src/core/all_cases/internal/basic_ranges.inl
  5. 57
      src/core/all_cases/internal/constant.inl
  6. 13
      src/core/benchmark/group.cc
  7. 43
      src/core/main.cc
  8. 2
      src/core/ranges/ranges.h
  9. 50
      src/core/utils/utility.h
  10. 10
      src/core/utils/worker.inl

37
src/core/all_cases/all_cases.h

@ -37,45 +37,25 @@
#pragma once #pragma once
#include <array>
#include <mutex> #include <mutex>
#include <numeric>
#include "utils/utility.h" #include "utils/utility.h"
#include "ranges/ranges.h" #include "ranges/ranges.h"
#include "internal/constant.inl"
namespace klotski::cases { namespace klotski::cases {
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
typedef std::array<Ranges, 16> RangesUnion;
// ------------------------------------------------------------------------------------- //
constexpr auto BASIC_RANGES_NUM = 7311885;
constexpr std::array ALL_CASES_NUM {
2942906, 2260392, 2942906, 0,
2322050, 1876945, 2322050, 0,
2322050, 1876945, 2322050, 0,
2942906, 2260392, 2942906, 0,
};
// TODO: move to short_code namespace (also `numeric` header)
constexpr auto ALL_CASES_NUM_ = std::accumulate(
ALL_CASES_NUM.begin(), ALL_CASES_NUM.end(), 0);
// ------------------------------------------------------------------------------------- //
class BasicRanges { class BasicRanges {
public: public:
/// Execute the build process and ensure thread safety. /// Execute the build process.
void build(); void build();
/// Execute the build process in parallel without blocking. /// Execute the build process without blocking.
void build_async(Executor &&executor, Notifier &&callback); void build_async(Executor &&executor, Notifier &&callback);
/// Get the basic-ranges and make sure the result is available. /// Get basic-ranges and make sure the result is available.
const Ranges& fetch(); const Ranges& fetch();
/// Determine whether the basic-ranges data is available. /// Determine whether the basic-ranges data is available.
@ -88,9 +68,6 @@ private:
/// Get static singleton variable. /// Get static singleton variable.
static Ranges& get_ranges(); static Ranges& get_ranges();
/// Search and sort all possible basic-ranges permutations.
static void build_ranges(Ranges &ranges);
KLSK_INSTANCE(BasicRanges) KLSK_INSTANCE(BasicRanges)
}; };
@ -98,11 +75,11 @@ private:
class AllCases { class AllCases {
public: public:
/// Execute the build process and ensure thread safety. /// Execute the build process.
void build(); void build();
/// Execute the build process in parallel without blocking. /// Execute the build process without blocking.
void build_parallel_async(Executor &&executor, Notifier &&callback); void build_async(Executor &&executor, Notifier &&callback);
/// Get all-cases and make sure the result is available. /// Get all-cases and make sure the result is available.
const RangesUnion& fetch(); const RangesUnion& fetch();

2
src/core/all_cases/internal/all_cases.cc

@ -100,7 +100,7 @@ void AllCases::build() {
available_ = true; available_ = true;
} }
void AllCases::build_parallel_async(Executor &&executor, Notifier &&callback) { void AllCases::build_async(Executor &&executor, Notifier &&callback) {
if (available_) { if (available_) {
callback(); callback();
return; // reduce consumption of mutex return; // reduce consumption of mutex

143
src/core/all_cases/internal/basic_ranges.cc

@ -1,21 +1,23 @@
#include <list> #include <list>
#include <algorithm> #include <algorithm>
#include "ranges/ranges.h" #include "group/group.h"
#include "all_cases/all_cases.h" #include "all_cases/all_cases.h"
using klotski::cases::Ranges; using klotski::cases::Ranges;
using klotski::cases::BasicRanges; using klotski::cases::BasicRanges;
using klotski::cases::TYPE_ID_LIMIT;
typedef Ranges::iterator RangesIter;
typedef std::tuple<int, int, int> RangeType; typedef std::tuple<int, int, int> RangeType;
typedef std::array<RangeType, 203> RangeTypeUnion; typedef std::array<RangeType, TYPE_ID_LIMIT> RangeTypeUnion;
/// Generate all possible basic-ranges permutations. /// Generate all possible basic-ranges permutations.
consteval static RangeTypeUnion range_types() { consteval static RangeTypeUnion range_types() {
RangeTypeUnion data; RangeTypeUnion data;
for (int i = 0, n = 0; n <= 7; ++n) { // 1x2 + 2x1 -> 0 ~ 7 for (int i = 0, n = 0; n <= 7; ++n) { // 1x2 + 2x1 -> 0 ~ 7
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) { // 2x1 -> 0 ~ n for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) { // 2x1 -> 0 ~ n
if (n == 7 && n_2x1 == 7) { if (n_2x1 == 7) {
break; break;
} }
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { // 1x1 -> 0 ~ (14 - 2n) for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { // 1x1 -> 0 ~ (14 - 2n)
@ -27,8 +29,8 @@ consteval static RangeTypeUnion range_types() {
} }
/// Combine two consecutive sorted arrays into one sorted arrays. /// Combine two consecutive sorted arrays into one sorted arrays.
static void inplace_merge(Ranges::iterator begin, Ranges::iterator mid, const Ranges::iterator end) { static void inplace_merge(RangesIter begin, RangesIter mid, const RangesIter end) {
std::vector<uint32_t> tmp = {begin, mid}; // left array backup std::vector<uint32_t> tmp {begin, mid}; // left array backup
for (auto p = tmp.begin();;) { for (auto p = tmp.begin();;) {
if (*p <= *mid) { if (*p <= *mid) {
*(begin++) = *(p++); // stored in original span *(begin++) = *(p++); // stored in original span
@ -44,88 +46,103 @@ static void inplace_merge(Ranges::iterator begin, Ranges::iterator mid, const Ra
} }
} }
void BasicRanges::build_ranges(Ranges &ranges) { void BasicRanges::build() {
ranges.clear(); if (available_) {
ranges.reserve(BASIC_RANGES_NUM); return; // reduce consumption of mutex
}
std::lock_guard guard {building_};
if (available_) {
return; // data is already available
}
std::list flags {ranges.begin()}; // TODO: flags can be constexpr auto &ranges = get_ranges();
ranges.clear();
ranges.reserve(BASIC_RANGES_NUM_);
for (auto [n, n_2x1, n_1x1] : range_types()) { for (auto [n, n_2x1, n_1x1] : range_types()) {
ranges.spawn(n, n_2x1, n_1x1); ranges.spawn(n, n_2x1, n_1x1);
flags.emplace_back(ranges.end()); // mark ordered interval
} }
std::list<RangesIter> points; // mark ordered interval
for (const auto offset : to_offset(BASIC_RANGES_NUM, 0)) {
points.emplace_back(ranges.begin() + offset);
}
points.emplace_back(ranges.end());
do { do {
decltype(flags.begin()) begin = flags.begin(), mid, end; decltype(points)::iterator begin = points.begin(), mid, end;
while (++(mid = begin) != flags.end() && ++(end = mid) != flags.end()) { while (++(mid = begin) != points.end() && ++(end = mid) != points.end()) {
inplace_merge(*begin, *mid, *end); // merge two ordered interval inplace_merge(*begin, *mid, *end); // merge two ordered interval
flags.erase(mid); points.erase(mid);
begin = end; begin = end;
} }
} while (flags.size() > 2); // merge until only one interval remains } while (points.size() > 2); // merge until only one interval remains
}
void do_sort(klotski::Executor &&executor, klotski::Notifier notifier, std::shared_ptr<std::list<Ranges::iterator>> flags) {
klotski::Worker worker {std::move(executor)};
decltype(flags->begin()) begin = flags->begin(), mid, end;
while (++(mid = begin) != flags->end() && ++(end = mid) != flags->end()) {
worker.post([begin = *begin, mid = *mid, end = *end]() {
inplace_merge(begin, mid, end); // merge two ordered interval
});
flags->erase(mid);
begin = end;
}
worker.then([flags, notifier](klotski::Executor &&executor) {
if (flags->size() == 2) {
notifier();
return;
}
do_sort(std::move(executor), notifier, flags);
});
available_ = true;
} }
void BasicRanges::build_async(Executor &&executor, Notifier &&callback) { void BasicRanges::build_async(Executor &&executor, Notifier &&callback) {
if (available_) {
callback();
return; // reduce consumption of mutex
}
building_.lock();
if (available_) {
building_.unlock();
callback();
return; // data is already available
}
// TODO: add mutex protect here auto all_done = [this, callback = std::move(callback)] {
available_ = true;
Worker worker {std::move(executor)}; building_.unlock();
auto cache = std::make_shared<std::array<Ranges, 203>>(); callback();
};
for (uint32_t i = 0; i < 203; ++i) {
Worker worker {executor};
auto cache = std::make_shared<std::array<Ranges, TYPE_ID_LIMIT>>();
for (uint32_t i = 0; i < TYPE_ID_LIMIT; ++i) {
(*cache)[i].reserve(BASIC_RANGES_NUM[i]);
worker.post([cache, i] { worker.post([cache, i] {
auto [n, n_2x1, n_1x1] = range_types()[i]; auto [n, n_2x1, n_1x1] = range_types()[i];
cache->operator[](i).spawn(n, n_2x1, n_1x1); (*cache)[i].spawn(n, n_2x1, n_1x1);
}); });
} }
// auto all_done = std::make_shared<Notifier>(std::move(callback)); worker.then([cache, all_done = std::move(all_done), executor = std::move(executor)] mutable {
worker.then([cache, this, callback](Executor &&executor) {
auto &ranges = get_ranges(); auto &ranges = get_ranges();
ranges.clear(); ranges.clear();
ranges.reserve(BASIC_RANGES_NUM); ranges.reserve(BASIC_RANGES_NUM_);
for (auto &&tmp : *cache) {
const auto flags = std::make_shared<std::list<Ranges::iterator>>();
flags->emplace_back(ranges.end());
for (auto &tmp : *cache) {
ranges.insert(ranges.end(), tmp.begin(), tmp.end()); ranges.insert(ranges.end(), tmp.begin(), tmp.end());
flags->emplace_back(ranges.end()); // mark ordered interval
} }
do_sort(std::move(executor), callback, flags); auto points = std::make_shared<std::list<RangesIter>>(); // mark ordered interval
for (const auto offset : to_offset(BASIC_RANGES_NUM, 0)) {
available_ = true; points->emplace_back(ranges.begin() + offset);
}
points->emplace_back(ranges.end());
auto inner_sort = [points, all_done, executor = std::move(executor)](auto &&self) -> void {
Worker sorter {executor};
auto begin = points->begin();
decltype(begin) mid, end;
while (++(mid = begin) != points->end() && ++(end = mid) != points->end()) {
sorter.post([begin = *begin, mid = *mid, end = *end] {
inplace_merge(begin, mid, end); // merge two ordered interval
});
points->erase(mid);
begin = end;
}
sorter.then([self, points, all_done] {
if (points->size() == 2) {
all_done();
return;
}
self(self); // next sort round
});
};
inner_sort(inner_sort); // TODO: using `this auto &&self` in new version
}); });
} }

11
src/core/all_cases/internal/basic_ranges.inl

@ -14,17 +14,6 @@ inline const Ranges& BasicRanges::fetch() {
return get_ranges(); return get_ranges();
} }
inline void BasicRanges::build() {
if (available_) {
return; // reduce consumption of mutex
}
std::lock_guard guard {building_};
if (!available_) {
build_ranges(get_ranges());
available_ = true;
}
}
inline bool BasicRanges::is_available() const { inline bool BasicRanges::is_available() const {
return available_; // no mutex required in one-way state return available_; // no mutex required in one-way state
} }

57
src/core/all_cases/internal/constant.inl

@ -0,0 +1,57 @@
#pragma once
#include <array>
namespace klotski::cases {
// ------------------------------------------------------------------------------------- //
constexpr auto ALL_CASES_NUM = std::to_array({
2942906, 2260392, 2942906, 0,
2322050, 1876945, 2322050, 0,
2322050, 1876945, 2322050, 0,
2942906, 2260392, 2942906, 0,
});
static_assert(ALL_CASES_NUM.size() == 16);
constexpr auto ALL_CASES_NUM_ = array_sum(ALL_CASES_NUM);
// ------------------------------------------------------------------------------------- //
constexpr auto BASIC_RANGES_NUM = std::to_array({
1 , 16 , 120 , 560 , 1820 , 4368 , 8008 , 11440 ,
12870 , 11440 , 8008 , 4368 , 1820 , 560 , 120 , 15 ,
210 , 1365 , 5460 , 15015 , 30030, 45045, 51480 , 45045 ,
30030 , 15015 , 5460 , 1365 , 15 , 210 , 1365 , 5460 ,
15015 , 30030 , 45045 , 51480 , 45045, 30030, 15015 , 5460 ,
1365 , 91 , 1092 , 6006 , 20020, 45045, 72072 , 84084 ,
72072 , 45045 , 20020 , 6006 , 182 , 2184 , 12012 , 40040 ,
90090 , 144144, 168168, 144144, 90090, 40040, 12012 , 91 ,
1092 , 6006 , 20020 , 45045 , 72072, 84084, 72072 , 45045 ,
20020 , 6006 , 286 , 2860 , 12870, 34320, 60060 , 72072 ,
60060 , 34320 , 12870 , 858 , 8580 , 38610, 102960, 180180,
216216, 180180, 102960, 38610 , 858 , 8580 , 38610 , 102960,
180180, 216216, 180180, 102960, 38610, 286 , 2860 , 12870 ,
34320 , 60060 , 72072 , 60060 , 34320, 12870, 495 , 3960 ,
13860 , 27720 , 34650 , 27720 , 13860, 1980 , 15840 , 55440 ,
110880, 138600, 110880, 55440 , 2970 , 23760, 83160 , 166320,
207900, 166320, 83160 , 1980 , 15840, 55440, 110880, 138600,
110880, 55440 , 495 , 3960 , 13860, 27720, 34650 , 27720 ,
13860 , 462 , 2772 , 6930 , 9240 , 6930 , 2310 , 13860 ,
34650 , 46200 , 34650 , 4620 , 27720, 69300, 92400 , 69300 ,
4620 , 27720 , 69300 , 92400 , 69300, 2310 , 13860 , 34650 ,
46200 , 34650 , 462 , 2772 , 6930 , 9240 , 6930 , 210 ,
840 , 1260 , 1260 , 5040 , 7560 , 3150 , 12600 , 18900 ,
4200 , 16800 , 25200 , 3150 , 12600, 18900, 1260 , 5040 ,
7560 , 210 , 840 , 1260 , 36 , 252 , 756 , 1260 ,
1260 , 756 , 252 ,
});
static_assert(BASIC_RANGES_NUM.size() == 203);
constexpr auto BASIC_RANGES_NUM_ = array_sum(BASIC_RANGES_NUM);
// ------------------------------------------------------------------------------------- //
} // namespace klotski::cases

13
src/core/benchmark/group.cc

@ -172,14 +172,17 @@ static void OriginBasicRanges(benchmark::State &state) {
for (auto _ : state) { for (auto _ : state) {
auto &kk = klotski::cases::BasicRanges::instance(); auto &kk = klotski::cases::BasicRanges::instance();
kk.build_ranges(kk.get_ranges()); // kk.build_ranges(kk.get_ranges());
kk.available_ = false;
// kk.build();
// kk.build_async([](auto func) {func();}, [](){}); // kk.build_async([](auto func) {func();}, [](){});
// kk.build_async([&pool](auto func) { kk.build_async([&pool](auto func) {
// pool.submit_task(func); pool.submit_task(func);
// }, [] {}); }, [] {});
// pool.wait(); pool.wait();
} }
} }

43
src/core/main.cc

@ -38,45 +38,32 @@ int main() {
BS::thread_pool pool {}; BS::thread_pool pool {};
// auto demo = [](auto &&self, int val) {
// std::cout << "val = " << val << std::endl;
// if (val == 0) {
// return;
// }
// self(self, val - 1);
// };
//
// demo(demo, 5);
// constexpr std::array<int, 5> kk {1, 2, 3, 4, 5};
// auto ret = klotski::to_offset(kk, 0);
// std::cout << std::format("{}", ret) << std::endl;
// klotski::cases::BasicRanges::instance().build(); // klotski::cases::BasicRanges::instance().build();
klotski::cases::BasicRanges::instance().build_async([&pool](auto &&func) { klotski::cases::BasicRanges::instance().build_async([&pool](auto &&func) {
pool.submit_task(func); pool.submit_task(func);
}, [] { }, [] {
std::cout << "all done" << std::endl; // std::cout << "all done" << std::endl;
}); });
// klotski::cases::BasicRanges::instance().build();
//
// klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) { // klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) {
// pool.submit_task(func); // pool.submit_task(func);
// }, [] {}); // }, [] {});
// std::cout << "start call" << std::endl;
// klotski::Notifier kk {};
// kk();
// std::cout << "end call" << std::endl;
// {
// klotski::Worker worker {[&pool](auto &&func) { pool.submit_task(func); }};
//
// for (int i = 1; i < 3; ++i) {
// worker.post([i] {
// std::cout << std::format("task {} begin\n", i);
// std::this_thread::sleep_for(std::chrono::seconds(i));
// std::cout << std::format("task {} complete\n", i);
// });
// }
//
// worker.then([](klotski::Executor &&executor){
// std::cout << "all tasks done\n";
// });
//
// std::cout << "worker start release\n";
// }
//
// std::cout << "block exit\n";
pool.wait(); pool.wait();
// std::cout << BasicRanges::instance().fetch().size() << std::endl; // std::cout << BasicRanges::instance().fetch().size() << std::endl;

2
src/core/ranges/ranges.h

@ -25,6 +25,8 @@ void derive_demo(const std::vector<uint32_t> &range, const std::vector<uint32_t>
void derive_demo_pro(const BidiRanges &bidi_range, std::vector<uint32_t> &output, int head); void derive_demo_pro(const BidiRanges &bidi_range, std::vector<uint32_t> &output, int head);
typedef std::array<Ranges, 16> RangesUnion;
// TODO: add RangesUnion here // TODO: add RangesUnion here
// TODO: -> spawn from Ranges / export std::vector<CommonCode> // TODO: -> spawn from Ranges / export std::vector<CommonCode>

50
src/core/utils/utility.h

@ -2,6 +2,7 @@
#include <bit> #include <bit>
#include <list> #include <list>
#include <numeric>
#include <functional> #include <functional>
/// Mark target class as a singleton. /// Mark target class as a singleton.
@ -26,23 +27,33 @@
namespace klotski { namespace klotski {
/// Get the number of consecutive `0` in the low bits. template <typename T>
// inline int low_zero_num(const uint32_t bin) { concept Addable = requires(T a, T b) { a + b; };
// return __builtin_ctzl(bin);
// template <Addable T, size_t N>
// // TODO: using (bin ^ (bin - 1)) when non-builtin consteval int array_sum(const std::array<T, N> &arr) {
// return std::accumulate(arr.begin(), arr.end(), 0);
// // WARN: be aware of serious performance issues }
// // return __builtin_popcount(~(bin ^ -bin)) - 1;
// } template <Addable T, size_t N>
consteval std::array<T, N> to_offset(const std::array<T, N> &arr, T base) {
/// Get the number of consecutive `0` in the low bits.
// inline int low_zero_num(const uint64_t bin) { static_assert(N > 0);
// return __builtin_ctzll(bin);
// std::array<T, N> offset;
// // WARN: be aware of serious performance issues
// // return __builtin_popcount(~(bin ^ -bin)) - 1; T val = base;
// }
offset[0] = 0;
for (int i = 0; i < N - 1; ++i) {
val += arr[i];
offset[i + 1] = val;
}
return offset;
}
/// Flips the input u32 every two bits in low-high symmetry. /// Flips the input u32 every two bits in low-high symmetry.
inline uint32_t range_reverse(uint32_t bin) { inline uint32_t range_reverse(uint32_t bin) {
@ -69,16 +80,15 @@ typedef std::function<void(std::function<void()> &&)> Executor;
class Worker final { class Worker final {
public: public:
using Task = std::function<void()>; using Task = std::function<void()>;
using After = std::function<void(Executor &&)>;
/// Construction based on executor. /// Construction based on executor.
explicit Worker(Executor &&executor); explicit Worker(Executor executor);
/// Post new task into the queue. /// Post new task into the queue.
void post(Task &&task); void post(Task &&task);
/// Setting up callback entry. /// Setting up callback entry.
void then(After &&after); void then(Notifier &&after);
/// Tasks will be triggered at destruction. /// Tasks will be triggered at destruction.
~Worker(); ~Worker();

10
src/core/utils/worker.inl

@ -4,16 +4,16 @@
namespace klotski { namespace klotski {
inline Worker::Worker(Executor &&executor) inline Worker::Worker(Executor executor)
: after_([] {}), executor_(executor) {} : after_([] {}), executor_(std::move(executor)) {}
inline void Worker::post(Task &&task) { inline void Worker::post(Task &&task) {
tasks_.emplace_back(std::move(task)); tasks_.emplace_back(std::move(task));
} }
inline void Worker::then(After &&after) { inline void Worker::then(Notifier &&after) {
after_ = [after = std::move(after), executor = executor_]() mutable { after_ = [after = std::move(after)]() {
after(std::move(executor)); after();
}; };
} }

Loading…
Cancel
Save