Browse Source

perf: optimize BasicRanges calculation

master
Dnomd343 6 months ago
parent
commit
986bae9f8c
  1. 37
      src/core/all_cases/all_cases.h
  2. 2
      src/core/all_cases/internal/all_cases.cc
  3. 143
      src/core/all_cases/internal/basic_ranges.cc
  4. 11
      src/core/all_cases/internal/basic_ranges.inl
  5. 57
      src/core/all_cases/internal/constant.inl
  6. 13
      src/core/benchmark/group.cc
  7. 43
      src/core/main.cc
  8. 2
      src/core/ranges/ranges.h
  9. 50
      src/core/utils/utility.h
  10. 10
      src/core/utils/worker.inl

37
src/core/all_cases/all_cases.h

@ -37,45 +37,25 @@
#pragma once
#include <array>
#include <mutex>
#include <numeric>
#include "utils/utility.h"
#include "ranges/ranges.h"
#include "internal/constant.inl"
namespace klotski::cases {
// ------------------------------------------------------------------------------------- //
typedef std::array<Ranges, 16> RangesUnion;
// ------------------------------------------------------------------------------------- //
constexpr auto BASIC_RANGES_NUM = 7311885;
constexpr std::array ALL_CASES_NUM {
2942906, 2260392, 2942906, 0,
2322050, 1876945, 2322050, 0,
2322050, 1876945, 2322050, 0,
2942906, 2260392, 2942906, 0,
};
// TODO: move to short_code namespace (also `numeric` header)
constexpr auto ALL_CASES_NUM_ = std::accumulate(
ALL_CASES_NUM.begin(), ALL_CASES_NUM.end(), 0);
// ------------------------------------------------------------------------------------- //
class BasicRanges {
public:
/// Execute the build process and ensure thread safety.
/// Execute the build process.
void build();
/// Execute the build process in parallel without blocking.
/// Execute the build process without blocking.
void build_async(Executor &&executor, Notifier &&callback);
/// Get the basic-ranges and make sure the result is available.
/// Get basic-ranges and make sure the result is available.
const Ranges& fetch();
/// Determine whether the basic-ranges data is available.
@ -88,9 +68,6 @@ private:
/// Get static singleton variable.
static Ranges& get_ranges();
/// Search and sort all possible basic-ranges permutations.
static void build_ranges(Ranges &ranges);
KLSK_INSTANCE(BasicRanges)
};
@ -98,11 +75,11 @@ private:
class AllCases {
public:
/// Execute the build process and ensure thread safety.
/// Execute the build process.
void build();
/// Execute the build process in parallel without blocking.
void build_parallel_async(Executor &&executor, Notifier &&callback);
/// Execute the build process without blocking.
void build_async(Executor &&executor, Notifier &&callback);
/// Get all-cases and make sure the result is available.
const RangesUnion& fetch();

2
src/core/all_cases/internal/all_cases.cc

@ -100,7 +100,7 @@ void AllCases::build() {
available_ = true;
}
void AllCases::build_parallel_async(Executor &&executor, Notifier &&callback) {
void AllCases::build_async(Executor &&executor, Notifier &&callback) {
if (available_) {
callback();
return; // reduce consumption of mutex

143
src/core/all_cases/internal/basic_ranges.cc

@ -1,21 +1,23 @@
#include <list>
#include <algorithm>
#include "ranges/ranges.h"
#include "group/group.h"
#include "all_cases/all_cases.h"
using klotski::cases::Ranges;
using klotski::cases::BasicRanges;
using klotski::cases::TYPE_ID_LIMIT;
typedef Ranges::iterator RangesIter;
typedef std::tuple<int, int, int> RangeType;
typedef std::array<RangeType, 203> RangeTypeUnion;
typedef std::array<RangeType, TYPE_ID_LIMIT> RangeTypeUnion;
/// Generate all possible basic-ranges permutations.
consteval static RangeTypeUnion range_types() {
RangeTypeUnion data;
for (int i = 0, n = 0; n <= 7; ++n) { // 1x2 + 2x1 -> 0 ~ 7
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) { // 2x1 -> 0 ~ n
if (n == 7 && n_2x1 == 7) {
if (n_2x1 == 7) {
break;
}
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { // 1x1 -> 0 ~ (14 - 2n)
@ -27,8 +29,8 @@ consteval static RangeTypeUnion range_types() {
}
/// Combine two consecutive sorted arrays into one sorted arrays.
static void inplace_merge(Ranges::iterator begin, Ranges::iterator mid, const Ranges::iterator end) {
std::vector<uint32_t> tmp = {begin, mid}; // left array backup
static void inplace_merge(RangesIter begin, RangesIter mid, const RangesIter end) {
std::vector<uint32_t> tmp {begin, mid}; // left array backup
for (auto p = tmp.begin();;) {
if (*p <= *mid) {
*(begin++) = *(p++); // stored in original span
@ -44,88 +46,103 @@ static void inplace_merge(Ranges::iterator begin, Ranges::iterator mid, const Ra
}
}
void BasicRanges::build_ranges(Ranges &ranges) {
ranges.clear();
ranges.reserve(BASIC_RANGES_NUM);
void BasicRanges::build() {
if (available_) {
return; // reduce consumption of mutex
}
std::lock_guard guard {building_};
if (available_) {
return; // data is already available
}
std::list flags {ranges.begin()}; // TODO: flags can be constexpr
auto &ranges = get_ranges();
ranges.clear();
ranges.reserve(BASIC_RANGES_NUM_);
for (auto [n, n_2x1, n_1x1] : range_types()) {
ranges.spawn(n, n_2x1, n_1x1);
flags.emplace_back(ranges.end()); // mark ordered interval
}
std::list<RangesIter> points; // mark ordered interval
for (const auto offset : to_offset(BASIC_RANGES_NUM, 0)) {
points.emplace_back(ranges.begin() + offset);
}
points.emplace_back(ranges.end());
do {
decltype(flags.begin()) begin = flags.begin(), mid, end;
while (++(mid = begin) != flags.end() && ++(end = mid) != flags.end()) {
decltype(points)::iterator begin = points.begin(), mid, end;
while (++(mid = begin) != points.end() && ++(end = mid) != points.end()) {
inplace_merge(*begin, *mid, *end); // merge two ordered interval
flags.erase(mid);
points.erase(mid);
begin = end;
}
} while (flags.size() > 2); // merge until only one interval remains
}
void do_sort(klotski::Executor &&executor, klotski::Notifier notifier, std::shared_ptr<std::list<Ranges::iterator>> flags) {
klotski::Worker worker {std::move(executor)};
decltype(flags->begin()) begin = flags->begin(), mid, end;
while (++(mid = begin) != flags->end() && ++(end = mid) != flags->end()) {
worker.post([begin = *begin, mid = *mid, end = *end]() {
inplace_merge(begin, mid, end); // merge two ordered interval
});
flags->erase(mid);
begin = end;
}
worker.then([flags, notifier](klotski::Executor &&executor) {
if (flags->size() == 2) {
notifier();
return;
}
do_sort(std::move(executor), notifier, flags);
});
} while (points.size() > 2); // merge until only one interval remains
available_ = true;
}
void BasicRanges::build_async(Executor &&executor, Notifier &&callback) {
if (available_) {
callback();
return; // reduce consumption of mutex
}
building_.lock();
if (available_) {
building_.unlock();
callback();
return; // data is already available
}
// TODO: add mutex protect here
Worker worker {std::move(executor)};
auto cache = std::make_shared<std::array<Ranges, 203>>();
for (uint32_t i = 0; i < 203; ++i) {
auto all_done = [this, callback = std::move(callback)] {
available_ = true;
building_.unlock();
callback();
};
Worker worker {executor};
auto cache = std::make_shared<std::array<Ranges, TYPE_ID_LIMIT>>();
for (uint32_t i = 0; i < TYPE_ID_LIMIT; ++i) {
(*cache)[i].reserve(BASIC_RANGES_NUM[i]);
worker.post([cache, i] {
auto [n, n_2x1, n_1x1] = range_types()[i];
cache->operator[](i).spawn(n, n_2x1, n_1x1);
(*cache)[i].spawn(n, n_2x1, n_1x1);
});
}
// auto all_done = std::make_shared<Notifier>(std::move(callback));
worker.then([cache, this, callback](Executor &&executor) {
worker.then([cache, all_done = std::move(all_done), executor = std::move(executor)] mutable {
auto &ranges = get_ranges();
ranges.clear();
ranges.reserve(BASIC_RANGES_NUM);
const auto flags = std::make_shared<std::list<Ranges::iterator>>();
flags->emplace_back(ranges.end());
for (auto &tmp : *cache) {
ranges.reserve(BASIC_RANGES_NUM_);
for (auto &&tmp : *cache) {
ranges.insert(ranges.end(), tmp.begin(), tmp.end());
flags->emplace_back(ranges.end()); // mark ordered interval
}
do_sort(std::move(executor), callback, flags);
available_ = true;
auto points = std::make_shared<std::list<RangesIter>>(); // mark ordered interval
for (const auto offset : to_offset(BASIC_RANGES_NUM, 0)) {
points->emplace_back(ranges.begin() + offset);
}
points->emplace_back(ranges.end());
auto inner_sort = [points, all_done, executor = std::move(executor)](auto &&self) -> void {
Worker sorter {executor};
auto begin = points->begin();
decltype(begin) mid, end;
while (++(mid = begin) != points->end() && ++(end = mid) != points->end()) {
sorter.post([begin = *begin, mid = *mid, end = *end] {
inplace_merge(begin, mid, end); // merge two ordered interval
});
points->erase(mid);
begin = end;
}
sorter.then([self, points, all_done] {
if (points->size() == 2) {
all_done();
return;
}
self(self); // next sort round
});
};
inner_sort(inner_sort); // TODO: using `this auto &&self` in new version
});
}

11
src/core/all_cases/internal/basic_ranges.inl

@ -14,17 +14,6 @@ inline const Ranges& BasicRanges::fetch() {
return get_ranges();
}
inline void BasicRanges::build() {
if (available_) {
return; // reduce consumption of mutex
}
std::lock_guard guard {building_};
if (!available_) {
build_ranges(get_ranges());
available_ = true;
}
}
inline bool BasicRanges::is_available() const {
return available_; // no mutex required in one-way state
}

57
src/core/all_cases/internal/constant.inl

@ -0,0 +1,57 @@
#pragma once
#include <array>
namespace klotski::cases {
// ------------------------------------------------------------------------------------- //
constexpr auto ALL_CASES_NUM = std::to_array({
2942906, 2260392, 2942906, 0,
2322050, 1876945, 2322050, 0,
2322050, 1876945, 2322050, 0,
2942906, 2260392, 2942906, 0,
});
static_assert(ALL_CASES_NUM.size() == 16);
constexpr auto ALL_CASES_NUM_ = array_sum(ALL_CASES_NUM);
// ------------------------------------------------------------------------------------- //
constexpr auto BASIC_RANGES_NUM = std::to_array({
1 , 16 , 120 , 560 , 1820 , 4368 , 8008 , 11440 ,
12870 , 11440 , 8008 , 4368 , 1820 , 560 , 120 , 15 ,
210 , 1365 , 5460 , 15015 , 30030, 45045, 51480 , 45045 ,
30030 , 15015 , 5460 , 1365 , 15 , 210 , 1365 , 5460 ,
15015 , 30030 , 45045 , 51480 , 45045, 30030, 15015 , 5460 ,
1365 , 91 , 1092 , 6006 , 20020, 45045, 72072 , 84084 ,
72072 , 45045 , 20020 , 6006 , 182 , 2184 , 12012 , 40040 ,
90090 , 144144, 168168, 144144, 90090, 40040, 12012 , 91 ,
1092 , 6006 , 20020 , 45045 , 72072, 84084, 72072 , 45045 ,
20020 , 6006 , 286 , 2860 , 12870, 34320, 60060 , 72072 ,
60060 , 34320 , 12870 , 858 , 8580 , 38610, 102960, 180180,
216216, 180180, 102960, 38610 , 858 , 8580 , 38610 , 102960,
180180, 216216, 180180, 102960, 38610, 286 , 2860 , 12870 ,
34320 , 60060 , 72072 , 60060 , 34320, 12870, 495 , 3960 ,
13860 , 27720 , 34650 , 27720 , 13860, 1980 , 15840 , 55440 ,
110880, 138600, 110880, 55440 , 2970 , 23760, 83160 , 166320,
207900, 166320, 83160 , 1980 , 15840, 55440, 110880, 138600,
110880, 55440 , 495 , 3960 , 13860, 27720, 34650 , 27720 ,
13860 , 462 , 2772 , 6930 , 9240 , 6930 , 2310 , 13860 ,
34650 , 46200 , 34650 , 4620 , 27720, 69300, 92400 , 69300 ,
4620 , 27720 , 69300 , 92400 , 69300, 2310 , 13860 , 34650 ,
46200 , 34650 , 462 , 2772 , 6930 , 9240 , 6930 , 210 ,
840 , 1260 , 1260 , 5040 , 7560 , 3150 , 12600 , 18900 ,
4200 , 16800 , 25200 , 3150 , 12600, 18900, 1260 , 5040 ,
7560 , 210 , 840 , 1260 , 36 , 252 , 756 , 1260 ,
1260 , 756 , 252 ,
});
static_assert(BASIC_RANGES_NUM.size() == 203);
constexpr auto BASIC_RANGES_NUM_ = array_sum(BASIC_RANGES_NUM);
// ------------------------------------------------------------------------------------- //
} // namespace klotski::cases

13
src/core/benchmark/group.cc

@ -172,14 +172,17 @@ static void OriginBasicRanges(benchmark::State &state) {
for (auto _ : state) {
auto &kk = klotski::cases::BasicRanges::instance();
kk.build_ranges(kk.get_ranges());
// kk.build_ranges(kk.get_ranges());
kk.available_ = false;
// kk.build();
// kk.build_async([](auto func) {func();}, [](){});
// kk.build_async([&pool](auto func) {
// pool.submit_task(func);
// }, [] {});
// pool.wait();
kk.build_async([&pool](auto func) {
pool.submit_task(func);
}, [] {});
pool.wait();
}
}

43
src/core/main.cc

@ -38,45 +38,32 @@ int main() {
BS::thread_pool pool {};
// auto demo = [](auto &&self, int val) {
// std::cout << "val = " << val << std::endl;
// if (val == 0) {
// return;
// }
// self(self, val - 1);
// };
//
// demo(demo, 5);
// constexpr std::array<int, 5> kk {1, 2, 3, 4, 5};
// auto ret = klotski::to_offset(kk, 0);
// std::cout << std::format("{}", ret) << std::endl;
// klotski::cases::BasicRanges::instance().build();
klotski::cases::BasicRanges::instance().build_async([&pool](auto &&func) {
pool.submit_task(func);
}, [] {
std::cout << "all done" << std::endl;
// std::cout << "all done" << std::endl;
});
// klotski::cases::BasicRanges::instance().build();
//
// klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) {
// pool.submit_task(func);
// }, [] {});
// std::cout << "start call" << std::endl;
// klotski::Notifier kk {};
// kk();
// std::cout << "end call" << std::endl;
// {
// klotski::Worker worker {[&pool](auto &&func) { pool.submit_task(func); }};
//
// for (int i = 1; i < 3; ++i) {
// worker.post([i] {
// std::cout << std::format("task {} begin\n", i);
// std::this_thread::sleep_for(std::chrono::seconds(i));
// std::cout << std::format("task {} complete\n", i);
// });
// }
//
// worker.then([](klotski::Executor &&executor){
// std::cout << "all tasks done\n";
// });
//
// std::cout << "worker start release\n";
// }
//
// std::cout << "block exit\n";
pool.wait();
// std::cout << BasicRanges::instance().fetch().size() << std::endl;

2
src/core/ranges/ranges.h

@ -25,6 +25,8 @@ void derive_demo(const std::vector<uint32_t> &range, const std::vector<uint32_t>
void derive_demo_pro(const BidiRanges &bidi_range, std::vector<uint32_t> &output, int head);
typedef std::array<Ranges, 16> RangesUnion;
// TODO: add RangesUnion here
// TODO: -> spawn from Ranges / export std::vector<CommonCode>

50
src/core/utils/utility.h

@ -2,6 +2,7 @@
#include <bit>
#include <list>
#include <numeric>
#include <functional>
/// Mark target class as a singleton.
@ -26,23 +27,33 @@
namespace klotski {
/// Get the number of consecutive `0` in the low bits.
// inline int low_zero_num(const uint32_t bin) {
// return __builtin_ctzl(bin);
//
// // TODO: using (bin ^ (bin - 1)) when non-builtin
//
// // WARN: be aware of serious performance issues
// // return __builtin_popcount(~(bin ^ -bin)) - 1;
// }
/// Get the number of consecutive `0` in the low bits.
// inline int low_zero_num(const uint64_t bin) {
// return __builtin_ctzll(bin);
//
// // WARN: be aware of serious performance issues
// // return __builtin_popcount(~(bin ^ -bin)) - 1;
// }
template <typename T>
concept Addable = requires(T a, T b) { a + b; };
template <Addable T, size_t N>
consteval int array_sum(const std::array<T, N> &arr) {
return std::accumulate(arr.begin(), arr.end(), 0);
}
template <Addable T, size_t N>
consteval std::array<T, N> to_offset(const std::array<T, N> &arr, T base) {
static_assert(N > 0);
std::array<T, N> offset;
T val = base;
offset[0] = 0;
for (int i = 0; i < N - 1; ++i) {
val += arr[i];
offset[i + 1] = val;
}
return offset;
}
/// Flips the input u32 every two bits in low-high symmetry.
inline uint32_t range_reverse(uint32_t bin) {
@ -69,16 +80,15 @@ typedef std::function<void(std::function<void()> &&)> Executor;
class Worker final {
public:
using Task = std::function<void()>;
using After = std::function<void(Executor &&)>;
/// Construction based on executor.
explicit Worker(Executor &&executor);
explicit Worker(Executor executor);
/// Post new task into the queue.
void post(Task &&task);
/// Setting up callback entry.
void then(After &&after);
void then(Notifier &&after);
/// Tasks will be triggered at destruction.
~Worker();

10
src/core/utils/worker.inl

@ -4,16 +4,16 @@
namespace klotski {
inline Worker::Worker(Executor &&executor)
: after_([] {}), executor_(executor) {}
inline Worker::Worker(Executor executor)
: after_([] {}), executor_(std::move(executor)) {}
inline void Worker::post(Task &&task) {
tasks_.emplace_back(std::move(task));
}
inline void Worker::then(After &&after) {
after_ = [after = std::move(after), executor = executor_]() mutable {
after(std::move(executor));
inline void Worker::then(Notifier &&after) {
after_ = [after = std::move(after)]() {
after();
};
}

Loading…
Cancel
Save