Browse Source

perf: update AllCases module

master
Dnomd343 1 month ago
parent
commit
265eb230f4
  1. 2
      src/core/CMakeLists.txt
  2. 16
      src/core/all_cases/all_cases.h
  3. 131
      src/core/all_cases/internal/all_cases.cc
  4. 113
      src/core/all_cases/internal/derive.cc
  5. 16
      src/core/benchmark/group.cc
  6. 36
      src/core/main.cc
  7. 14
      src/core_ffi/c_ffi/all_cases.cc
  8. 26
      src/core_test/cases/all_cases.cc

2
src/core/CMakeLists.txt

@ -6,7 +6,7 @@ set(CMAKE_CXX_STANDARD 23)
set(KLOTSKI_CORE_SRC set(KLOTSKI_CORE_SRC
all_cases/internal/basic_ranges.cc all_cases/internal/basic_ranges.cc
all_cases/internal/all_cases.cc all_cases/internal/all_cases.cc
all_cases/internal/derive.cc # all_cases/internal/derive.cc
common_code/internal/common_code.cc common_code/internal/common_code.cc
common_code/internal/serialize.cc common_code/internal/serialize.cc

16
src/core/all_cases/all_cases.h

@ -106,7 +106,7 @@ public:
/// TODO: remove this interface /// TODO: remove this interface
/// Execute the build process with parallel support and ensure thread safety. /// Execute the build process with parallel support and ensure thread safety.
void build_parallel(Executor &&executor); // void build_parallel(Executor &&executor);
/// Execute the build process in parallel without blocking. /// Execute the build process in parallel without blocking.
void build_parallel_async(Executor &&executor, Notifier &&callback); void build_parallel_async(Executor &&executor, Notifier &&callback);
@ -127,20 +127,6 @@ private:
KLSK_INSTANCE(AllCases) KLSK_INSTANCE(AllCases)
}; };
inline const std::vector<uint32_t>& get_reversed() {
static auto value = []() {
std::vector<uint32_t> ranges {BasicRanges::instance().fetch()};
for (auto &x : ranges) {
x = range_reverse(x);
}
return ranges;
}();
return value;
}
void global_derive(const std::vector<uint32_t> &range, std::vector<uint32_t> &output, int head);
void global_derive_pro(const std::vector<uint32_t> &range, const std::vector<uint32_t> &reversed, std::vector<uint32_t> &output, int head);
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
} // namespace klotski::cases } // namespace klotski::cases

131
src/core/all_cases/internal/all_cases.cc

@ -1,14 +1,17 @@
#include <future> #include <future>
#include "utils/utility.h"
#include "ranges/ranges.h"
#include "all_cases/all_cases.h" #include "all_cases/all_cases.h"
using klotski::range_reverse;
using klotski::cases::Ranges; using klotski::cases::Ranges;
using klotski::cases::AllCases; using klotski::cases::AllCases;
using klotski::cases::BasicRanges; using klotski::cases::BasicRanges;
using klotski::cases::ALL_CASES_NUM; using klotski::cases::ALL_CASES_NUM;
/// Generate all possible klotski heads. /// Generate all possible klotski heads.
consteval static std::array<int, 12> heads() { static consteval std::array<int, 12> heads() {
std::array<int, 12> heads {}; std::array<int, 12> heads {};
for (int i = 0, head = 0; head < 15; ++head) { for (int i = 0, head = 0; head < 15; ++head) {
if (head % 4 != 3) { if (head % 4 != 3) {
@ -18,72 +21,81 @@ consteval static std::array<int, 12> heads() {
return heads; return heads;
} }
/// Check whether the combination of head and range is valid.
static int check_range(const int head, uint32_t range) {
uint32_t flags = 0b110011 << head; // fill 2x2 block
for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit
const auto num = std::countr_one(flags);
addr += num; // next unfilled block
flags >>= num;
switch (range & 0b11) {
case 0b00: // space
case 0b11: // 1x1 block
flags |= 0b1;
continue;
case 0b01: // 1x2 block
if (flags & 0b10 || addr % 4 == 3) { // invalid case
return offset; // broken offset
}
flags |= 0b11;
continue;
case 0b10: // 2x1 block
if (flags & 0b10000 || addr > 15) { // invalid case
return offset; // broken offset
}
flags |= 0b10001;
}
}
return 0; // pass check
}
/// Build all valid ranges of the specified head. /// Build all valid ranges of the specified head.
static void build_cases(const int head, Ranges &release) { static void build_cases(const std::vector<uint32_t> &ranges,
const std::vector<uint32_t> &reversed, Ranges &release, const int head) {
release.clear(); release.clear();
release.reserve(ALL_CASES_NUM[head]); release.reserve(ALL_CASES_NUM[head]);
// klotski::cases::global_derive(BasicRanges::instance().fetch(), release, head); for (uint32_t index = 0; index < reversed.size(); ++index) {
klotski::cases::global_derive_pro(BasicRanges::instance().fetch(), klotski::cases::get_reversed(), release, head); if (const auto offset = check_range(head, reversed[index])) { // invalid case
if (offset > 14) {
// BasicRanges::instance().fetch().derive(head, release); continue;
} }
void AllCases::build() {
// TODO: lock here
// klotski::cases::get_reversed();
std::vector<uint32_t> reversed {BasicRanges::instance().fetch()};
for (auto &x : reversed) {
x = range_reverse(x);
}
// std::vector<uint32_t> reversed;
// std::ranges::transform(BasicRanges::instance().fetch(), std::back_inserter(reversed), [](uint32_t x) { return range_reverse(x); });
// auto &reversed = get_reversed();
for (auto head : heads()) {
// build_cases(head, get_cases()[head]);
auto &release = get_cases()[head];
release.clear();
release.reserve(ALL_CASES_NUM[head]);
// klotski::cases::global_derive(BasicRanges::instance().fetch(), release, head); // !! <- broken
// klotski::cases::global_derive_pro(BasicRanges::instance().fetch(), reversed, release, head); // ( xx xx xx ) xx xx xx ... [range]
klotski::cases::global_derive_pro(reversed, BasicRanges::instance().fetch(), release, head); // +1 00 00 00 ... (delta)
const int tmp = (16 - offset) * 2;
uint32_t min_next = ((ranges[index] >> tmp) + 1) << tmp; // next possible range
if (offset > 5) { // located next range by min_next
while (ranges[++index] < min_next) {}
} else {
index = std::lower_bound(ranges.begin() + index, ranges.end(), min_next) - ranges.begin();
}
--index;
continue;
}
release.emplace_back(range_reverse(reversed[index])); // release valid case
} }
available_ = true;
// build_parallel([](auto &&func) {
// func();
// });
} }
void AllCases::build_parallel(Executor &&executor) { void AllCases::build() {
if (available_) { if (available_) {
return; // reduce consumption of mutex return; // reduce consumption of mutex
} }
std::lock_guard<std::mutex> guard(building_);
std::lock_guard guard {building_};
if (available_) { if (available_) {
return; // data is already available return; // data is already available
} }
std::vector<std::future<void>> futures;
for (auto head : heads()) { const auto &ranges = BasicRanges::instance().fetch();
auto promise = std::make_shared<std::promise<void>>(); std::vector reversed {ranges};
futures.emplace_back(promise->get_future()); for (auto &x : reversed) {
executor([head, promise = std::move(promise)]() { x = range_reverse(x);
build_cases(head, get_cases()[head]);
promise->set_value(); // subtask completed notification
});
} }
for (auto &x : futures) { for (const auto head : heads()) {
x.get(); // wait until all subtasks completed build_cases(ranges, reversed, get_cases()[head], head);
} }
available_ = true; available_ = true;
} }
@ -93,17 +105,24 @@ void AllCases::build_parallel_async(Executor &&executor, Notifier &&callback) {
callback(); callback();
return; // reduce consumption of mutex return; // reduce consumption of mutex
} }
building_.lock(); building_.lock();
if (available_) { if (available_) {
building_.unlock(); building_.unlock();
callback(); callback();
return; // data is already available return; // data is already available
} }
auto counter = std::make_shared<std::atomic<int>>(0);
auto all_done = std::make_shared<Notifier>(std::move(callback)); const auto counter = std::make_shared<std::atomic<int>>(0);
for (auto head : heads()) { const auto all_done = std::make_shared<Notifier>(std::move(callback));
executor([this, head, counter, all_done]() { const auto reversed = std::make_shared<std::vector<uint32_t>>(BasicRanges::instance().fetch());
build_cases(head, get_cases()[head]); for (auto &x : *reversed) {
x = range_reverse(x);
}
for (const auto head : heads()) {
executor([=, this] {
build_cases(BasicRanges::instance().fetch(), *reversed, get_cases()[head], head);
if (counter->fetch_add(1) == heads().size() - 1) { // all tasks done if (counter->fetch_add(1) == heads().size() - 1) { // all tasks done
available_ = true; available_ = true;
building_.unlock(); // release building mutex building_.unlock(); // release building mutex

113
src/core/all_cases/internal/derive.cc

@ -1,113 +0,0 @@
#include "utils/utility.h"
#include "ranges/ranges.h"
#include "all_cases/all_cases.h"
using klotski::cases::Ranges;
/// Check whether the combination of head and range is valid.
static int check_range(const int head, uint32_t range) {
uint32_t flags = 0b110011 << head; // fill 2x2 block
for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit
const auto num = std::countr_one(flags);
addr += num; // next unfilled block
flags >>= num;
switch (range & 0b11) {
case 0b00: // space
case 0b11: // 1x1 block
flags |= 0b1;
continue;
case 0b01: // 1x2 block
if (flags & 0b10 || addr % 4 == 3) { // invalid case
return offset; // broken offset
}
flags |= 0b11;
continue;
case 0b10: // 2x1 block
if (flags & 0b10000 || addr > 15) { // invalid case
return offset; // broken offset
}
flags |= 0b10001;
}
}
return 0; // pass check
}
void klotski::cases::global_derive(const std::vector<uint32_t> &range, std::vector<uint32_t> &output, int head) {
for (uint32_t index = 0; index < range.size(); ++index) {
if (const auto offset = check_range(head, range[index])) { // invalid case
// if (offset > 14) {
// continue;
// }
int left_offset = (16 - offset) * 2;
uint32_t min_next = ((range_reverse(range[index]) >> left_offset) + 1) << left_offset;
// if (offset > 5) {
while (range_reverse(range[++index]) < min_next) {} // located next range
--index;
// } else {
// auto begin = reversed.begin() + index;
// auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin;
// index += kk - 1;
// }
// uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
// tmp += range_reverse(range[index]) & ~(tmp - 1);
// TODO: overflow here in some type_id
// TODO: -> tmp > range[-1]
// TODO: maybe using binary search here
// while (range_reverse(range[++index]) < tmp) {} // located next range
// --index;
continue;
}
output.emplace_back(range_reverse(range[index])); // release valid case
}
}
void klotski::cases::global_derive_pro(const std::vector<uint32_t> &range, const std::vector<uint32_t> &reversed, std::vector<uint32_t> &output, int head) {
// uint32_t reversed_max = reversed.back();
for (uint32_t index = 0; index < range.size(); ++index) {
if (const auto offset = check_range(head, range[index])) { // invalid case
if (offset > 14) {
continue;
}
int left_offset = (16 - offset) * 2;
uint32_t min_next = ((reversed[index] >> left_offset) + 1) << left_offset;
if (offset > 5) {
while (reversed[++index] < min_next) {} // located next range
--index;
} else {
auto begin = reversed.begin() + index;
auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin;
index += kk - 1;
}
// uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
// tmp += range_reverse(range[index]) & ~(tmp - 1);
// TODO: overflow here in some type_id
// TODO: -> tmp > range[-1]
// TODO: maybe using binary search here
// while (range_reverse(range[++index]) < tmp) {} // located next range
// --index;
continue;
}
output.emplace_back(range_reverse(range[index])); // release valid case
}
}

16
src/core/benchmark/group.cc

@ -7,6 +7,7 @@
#include <ranges/ranges.h> #include <ranges/ranges.h>
#include "../../../third_party/thread-pool/include/BS_thread_pool.hpp"
#include "all_cases/all_cases.h" #include "all_cases/all_cases.h"
#undef private #undef private
@ -175,12 +176,25 @@ static void OriginBasicRanges(benchmark::State &state) {
static void OriginAllCases(benchmark::State &state) { static void OriginAllCases(benchmark::State &state) {
klotski::cases::BasicRanges::instance().build(); klotski::cases::BasicRanges::instance().build();
klotski::cases::get_reversed(); // klotski::cases::get_reversed();
// BS::thread_pool pool {4};
for (auto _ : state) { for (auto _ : state) {
auto &pp = klotski::cases::AllCases::instance(); auto &pp = klotski::cases::AllCases::instance();
pp.available_ = false; pp.available_ = false;
pp.build(); pp.build();
// pp.build_parallel_async([](auto func) {func();}, []() {
// // std::cout << "hello" << std::endl;
// });
// pp.build_parallel_async([&pool](auto func) {
// pool.submit_task(func);
// }, [] {});
//
// pool.wait();
} }
} }

36
src/core/main.cc

@ -12,6 +12,8 @@
#include "short_code/short_code.h" #include "short_code/short_code.h"
#include "common_code/common_code.h" #include "common_code/common_code.h"
#include "../../third_party/thread-pool/include/BS_thread_pool.hpp"
using klotski::core::Core; using klotski::core::Core;
using klotski::cases::AllCases; using klotski::cases::AllCases;
@ -25,35 +27,21 @@ using klotski::cases::GroupUnion;
using klotski::codec::SHORT_CODE_LIMIT; using klotski::codec::SHORT_CODE_LIMIT;
int main() { int main() {
const auto start = clock(); // const auto start = clock();
auto &basic_ranges = klotski::cases::BasicRanges::instance().fetch();
klotski::cases::Ranges flip {basic_ranges};
for (auto &x : flip) {
x = klotski::range_reverse(x);
}
klotski::cases::Ranges results; const auto start = std::chrono::system_clock::now();
results.reserve(klotski::cases::ALL_CASES_NUM_);
klotski::cases::derive_demo(basic_ranges, flip, results, 0); BS::thread_pool pool {};
klotski::cases::derive_demo(basic_ranges, flip, results, 1);
klotski::cases::derive_demo(basic_ranges, flip, results, 2);
klotski::cases::derive_demo(basic_ranges, flip, results, 4); klotski::cases::BasicRanges::instance().build();
klotski::cases::derive_demo(basic_ranges, flip, results, 5);
klotski::cases::derive_demo(basic_ranges, flip, results, 6);
klotski::cases::derive_demo(basic_ranges, flip, results, 8); klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) {
klotski::cases::derive_demo(basic_ranges, flip, results, 9); pool.submit_task(func);
klotski::cases::derive_demo(basic_ranges, flip, results, 10); }, [] {});
klotski::cases::derive_demo(basic_ranges, flip, results, 12); pool.wait();
klotski::cases::derive_demo(basic_ranges, flip, results, 13);
klotski::cases::derive_demo(basic_ranges, flip, results, 14);
// std::cout << results.size() << " vs " << klotski::cases::ALL_CASES_NUM_ << std::endl; std::cerr << std::chrono::system_clock::now() - start << std::endl;
// auto raw_code = RawCode::from_common_code(0x1A9BF0C00)->unwrap(); // auto raw_code = RawCode::from_common_code(0x1A9BF0C00)->unwrap();
// auto ret = klotski::cases::group_extend_from_seed(raw_code); // auto ret = klotski::cases::group_extend_from_seed(raw_code);
@ -92,7 +80,7 @@ int main() {
// std::cout << "----" << std::endl; // std::cout << "----" << std::endl;
// } // }
std::cerr << ((clock() - start) * 1000 / CLOCKS_PER_SEC) << "ms" << std::endl; // std::cerr << ((clock() - start) * 1000 / CLOCKS_PER_SEC) << "ms" << std::endl;
return 0; return 0;
} }

14
src/core_ffi/c_ffi/all_cases.cc

@ -40,13 +40,13 @@ void all_cases_build_async(const executor_t executor, const notifier_t callback)
} }
void all_cases_build_parallel(executor_t executor) { void all_cases_build_parallel(executor_t executor) {
AllCases::instance().build_parallel([executor](Runner &&runner) { // AllCases::instance().build_parallel([executor](Runner &&runner) {
const auto func = [](void *arg) { // const auto func = [](void *arg) {
(*static_cast<Runner*>(arg))(); // (*static_cast<Runner*>(arg))();
delete static_cast<Runner*>(arg); // delete static_cast<Runner*>(arg);
}; // };
executor(func, new Runner {std::move(runner)}); // executor(func, new Runner {std::move(runner)});
}); // });
} }
void all_cases_build_parallel_async(executor_t executor, notifier_t callback) { void all_cases_build_parallel_async(executor_t executor, notifier_t callback) {

26
src/core_test/cases/all_cases.cc

@ -89,23 +89,23 @@ TEST_FF(AllCases, all_cases_race) {
} }
TEST_FF(AllCases, all_cases_parallel) { TEST_FF(AllCases, all_cases_parallel) {
AllCases::instance().build_parallel(executor_.Entry()); // AllCases::instance().build_parallel(executor_.Entry());
EXPECT_TRUE(Available()); // EXPECT_TRUE(Available());
Verify(); // Verify();
AllCases::instance().build_parallel(executor_.Entry()); // AllCases::instance().build_parallel(executor_.Entry());
EXPECT_TRUE(Available()); // EXPECT_TRUE(Available());
Verify(); // Verify();
} }
TEST_FF(AllCases, all_cases_parallel_race) { TEST_FF(AllCases, all_cases_parallel_race) {
racer_.Begin([this] { // racer_.Begin([this] {
AllCases::instance().build_parallel(executor_.Entry()); // AllCases::instance().build_parallel(executor_.Entry());
}); // });
EXPECT_FALSE(Available()); // EXPECT_FALSE(Available());
racer_.Join(); // racer_.Join();
EXPECT_TRUE(Available()); // EXPECT_TRUE(Available());
Verify(); // Verify();
} }
TEST_FF(AllCases, all_cases_async) { TEST_FF(AllCases, all_cases_async) {

Loading…
Cancel
Save