Browse Source

perf: update AllCases module

master
Dnomd343 1 month ago
parent
commit
265eb230f4
  1. 2
      src/core/CMakeLists.txt
  2. 16
      src/core/all_cases/all_cases.h
  3. 131
      src/core/all_cases/internal/all_cases.cc
  4. 113
      src/core/all_cases/internal/derive.cc
  5. 16
      src/core/benchmark/group.cc
  6. 36
      src/core/main.cc
  7. 14
      src/core_ffi/c_ffi/all_cases.cc
  8. 26
      src/core_test/cases/all_cases.cc

2
src/core/CMakeLists.txt

@ -6,7 +6,7 @@ set(CMAKE_CXX_STANDARD 23)
set(KLOTSKI_CORE_SRC
all_cases/internal/basic_ranges.cc
all_cases/internal/all_cases.cc
all_cases/internal/derive.cc
# all_cases/internal/derive.cc
common_code/internal/common_code.cc
common_code/internal/serialize.cc

16
src/core/all_cases/all_cases.h

@ -106,7 +106,7 @@ public:
/// TODO: remove this interface
/// Execute the build process with parallel support and ensure thread safety.
void build_parallel(Executor &&executor);
// void build_parallel(Executor &&executor);
/// Execute the build process in parallel without blocking.
void build_parallel_async(Executor &&executor, Notifier &&callback);
@ -127,20 +127,6 @@ private:
KLSK_INSTANCE(AllCases)
};
inline const std::vector<uint32_t>& get_reversed() {
static auto value = []() {
std::vector<uint32_t> ranges {BasicRanges::instance().fetch()};
for (auto &x : ranges) {
x = range_reverse(x);
}
return ranges;
}();
return value;
}
void global_derive(const std::vector<uint32_t> &range, std::vector<uint32_t> &output, int head);
void global_derive_pro(const std::vector<uint32_t> &range, const std::vector<uint32_t> &reversed, std::vector<uint32_t> &output, int head);
// ------------------------------------------------------------------------------------- //
} // namespace klotski::cases

131
src/core/all_cases/internal/all_cases.cc

@ -1,14 +1,17 @@
#include <future>
#include "utils/utility.h"
#include "ranges/ranges.h"
#include "all_cases/all_cases.h"
using klotski::range_reverse;
using klotski::cases::Ranges;
using klotski::cases::AllCases;
using klotski::cases::BasicRanges;
using klotski::cases::ALL_CASES_NUM;
/// Generate all possible klotski heads.
consteval static std::array<int, 12> heads() {
static consteval std::array<int, 12> heads() {
std::array<int, 12> heads {};
for (int i = 0, head = 0; head < 15; ++head) {
if (head % 4 != 3) {
@ -18,72 +21,81 @@ consteval static std::array<int, 12> heads() {
return heads;
}
/// Check whether the combination of head and range is valid.
static int check_range(const int head, uint32_t range) {
uint32_t flags = 0b110011 << head; // fill 2x2 block
for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit
const auto num = std::countr_one(flags);
addr += num; // next unfilled block
flags >>= num;
switch (range & 0b11) {
case 0b00: // space
case 0b11: // 1x1 block
flags |= 0b1;
continue;
case 0b01: // 1x2 block
if (flags & 0b10 || addr % 4 == 3) { // invalid case
return offset; // broken offset
}
flags |= 0b11;
continue;
case 0b10: // 2x1 block
if (flags & 0b10000 || addr > 15) { // invalid case
return offset; // broken offset
}
flags |= 0b10001;
}
}
return 0; // pass check
}
/// Build all valid ranges of the specified head.
static void build_cases(const int head, Ranges &release) {
static void build_cases(const std::vector<uint32_t> &ranges,
const std::vector<uint32_t> &reversed, Ranges &release, const int head) {
release.clear();
release.reserve(ALL_CASES_NUM[head]);
// klotski::cases::global_derive(BasicRanges::instance().fetch(), release, head);
klotski::cases::global_derive_pro(BasicRanges::instance().fetch(), klotski::cases::get_reversed(), release, head);
// BasicRanges::instance().fetch().derive(head, release);
}
void AllCases::build() {
// TODO: lock here
// klotski::cases::get_reversed();
std::vector<uint32_t> reversed {BasicRanges::instance().fetch()};
for (auto &x : reversed) {
x = range_reverse(x);
}
// std::vector<uint32_t> reversed;
// std::ranges::transform(BasicRanges::instance().fetch(), std::back_inserter(reversed), [](uint32_t x) { return range_reverse(x); });
// auto &reversed = get_reversed();
for (auto head : heads()) {
// build_cases(head, get_cases()[head]);
auto &release = get_cases()[head];
release.clear();
release.reserve(ALL_CASES_NUM[head]);
for (uint32_t index = 0; index < reversed.size(); ++index) {
if (const auto offset = check_range(head, reversed[index])) { // invalid case
if (offset > 14) {
continue;
}
// klotski::cases::global_derive(BasicRanges::instance().fetch(), release, head);
// klotski::cases::global_derive_pro(BasicRanges::instance().fetch(), reversed, release, head);
klotski::cases::global_derive_pro(reversed, BasicRanges::instance().fetch(), release, head);
// !! <- broken
// ( xx xx xx ) xx xx xx ... [range]
// +1 00 00 00 ... (delta)
const int tmp = (16 - offset) * 2;
uint32_t min_next = ((ranges[index] >> tmp) + 1) << tmp; // next possible range
if (offset > 5) { // located next range by min_next
while (ranges[++index] < min_next) {}
} else {
index = std::lower_bound(ranges.begin() + index, ranges.end(), min_next) - ranges.begin();
}
--index;
continue;
}
release.emplace_back(range_reverse(reversed[index])); // release valid case
}
available_ = true;
// build_parallel([](auto &&func) {
// func();
// });
}
void AllCases::build_parallel(Executor &&executor) {
void AllCases::build() {
if (available_) {
return; // reduce consumption of mutex
}
std::lock_guard<std::mutex> guard(building_);
std::lock_guard guard {building_};
if (available_) {
return; // data is already available
}
std::vector<std::future<void>> futures;
for (auto head : heads()) {
auto promise = std::make_shared<std::promise<void>>();
futures.emplace_back(promise->get_future());
executor([head, promise = std::move(promise)]() {
build_cases(head, get_cases()[head]);
promise->set_value(); // subtask completed notification
});
const auto &ranges = BasicRanges::instance().fetch();
std::vector reversed {ranges};
for (auto &x : reversed) {
x = range_reverse(x);
}
for (auto &x : futures) {
x.get(); // wait until all subtasks completed
for (const auto head : heads()) {
build_cases(ranges, reversed, get_cases()[head], head);
}
available_ = true;
}
@ -93,17 +105,24 @@ void AllCases::build_parallel_async(Executor &&executor, Notifier &&callback) {
callback();
return; // reduce consumption of mutex
}
building_.lock();
if (available_) {
building_.unlock();
callback();
return; // data is already available
}
auto counter = std::make_shared<std::atomic<int>>(0);
auto all_done = std::make_shared<Notifier>(std::move(callback));
for (auto head : heads()) {
executor([this, head, counter, all_done]() {
build_cases(head, get_cases()[head]);
const auto counter = std::make_shared<std::atomic<int>>(0);
const auto all_done = std::make_shared<Notifier>(std::move(callback));
const auto reversed = std::make_shared<std::vector<uint32_t>>(BasicRanges::instance().fetch());
for (auto &x : *reversed) {
x = range_reverse(x);
}
for (const auto head : heads()) {
executor([=, this] {
build_cases(BasicRanges::instance().fetch(), *reversed, get_cases()[head], head);
if (counter->fetch_add(1) == heads().size() - 1) { // all tasks done
available_ = true;
building_.unlock(); // release building mutex

113
src/core/all_cases/internal/derive.cc

@ -1,113 +0,0 @@
#include "utils/utility.h"
#include "ranges/ranges.h"
#include "all_cases/all_cases.h"
using klotski::cases::Ranges;
/// Check whether the combination of head and range is valid.
static int check_range(const int head, uint32_t range) {
uint32_t flags = 0b110011 << head; // fill 2x2 block
for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit
const auto num = std::countr_one(flags);
addr += num; // next unfilled block
flags >>= num;
switch (range & 0b11) {
case 0b00: // space
case 0b11: // 1x1 block
flags |= 0b1;
continue;
case 0b01: // 1x2 block
if (flags & 0b10 || addr % 4 == 3) { // invalid case
return offset; // broken offset
}
flags |= 0b11;
continue;
case 0b10: // 2x1 block
if (flags & 0b10000 || addr > 15) { // invalid case
return offset; // broken offset
}
flags |= 0b10001;
}
}
return 0; // pass check
}
void klotski::cases::global_derive(const std::vector<uint32_t> &range, std::vector<uint32_t> &output, int head) {
for (uint32_t index = 0; index < range.size(); ++index) {
if (const auto offset = check_range(head, range[index])) { // invalid case
// if (offset > 14) {
// continue;
// }
int left_offset = (16 - offset) * 2;
uint32_t min_next = ((range_reverse(range[index]) >> left_offset) + 1) << left_offset;
// if (offset > 5) {
while (range_reverse(range[++index]) < min_next) {} // located next range
--index;
// } else {
// auto begin = reversed.begin() + index;
// auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin;
// index += kk - 1;
// }
// uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
// tmp += range_reverse(range[index]) & ~(tmp - 1);
// TODO: overflow here in some type_id
// TODO: -> tmp > range[-1]
// TODO: maybe using binary search here
// while (range_reverse(range[++index]) < tmp) {} // located next range
// --index;
continue;
}
output.emplace_back(range_reverse(range[index])); // release valid case
}
}
void klotski::cases::global_derive_pro(const std::vector<uint32_t> &range, const std::vector<uint32_t> &reversed, std::vector<uint32_t> &output, int head) {
// uint32_t reversed_max = reversed.back();
for (uint32_t index = 0; index < range.size(); ++index) {
if (const auto offset = check_range(head, range[index])) { // invalid case
if (offset > 14) {
continue;
}
int left_offset = (16 - offset) * 2;
uint32_t min_next = ((reversed[index] >> left_offset) + 1) << left_offset;
if (offset > 5) {
while (reversed[++index] < min_next) {} // located next range
--index;
} else {
auto begin = reversed.begin() + index;
auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin;
index += kk - 1;
}
// uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
// tmp += range_reverse(range[index]) & ~(tmp - 1);
// TODO: overflow here in some type_id
// TODO: -> tmp > range[-1]
// TODO: maybe using binary search here
// while (range_reverse(range[++index]) < tmp) {} // located next range
// --index;
continue;
}
output.emplace_back(range_reverse(range[index])); // release valid case
}
}

16
src/core/benchmark/group.cc

@ -7,6 +7,7 @@
#include <ranges/ranges.h>
#include "../../../third_party/thread-pool/include/BS_thread_pool.hpp"
#include "all_cases/all_cases.h"
#undef private
@ -175,12 +176,25 @@ static void OriginBasicRanges(benchmark::State &state) {
static void OriginAllCases(benchmark::State &state) {
klotski::cases::BasicRanges::instance().build();
klotski::cases::get_reversed();
// klotski::cases::get_reversed();
// BS::thread_pool pool {4};
for (auto _ : state) {
auto &pp = klotski::cases::AllCases::instance();
pp.available_ = false;
pp.build();
// pp.build_parallel_async([](auto func) {func();}, []() {
// // std::cout << "hello" << std::endl;
// });
// pp.build_parallel_async([&pool](auto func) {
// pool.submit_task(func);
// }, [] {});
//
// pool.wait();
}
}

36
src/core/main.cc

@ -12,6 +12,8 @@
#include "short_code/short_code.h"
#include "common_code/common_code.h"
#include "../../third_party/thread-pool/include/BS_thread_pool.hpp"
using klotski::core::Core;
using klotski::cases::AllCases;
@ -25,35 +27,21 @@ using klotski::cases::GroupUnion;
using klotski::codec::SHORT_CODE_LIMIT;
int main() {
const auto start = clock();
auto &basic_ranges = klotski::cases::BasicRanges::instance().fetch();
klotski::cases::Ranges flip {basic_ranges};
for (auto &x : flip) {
x = klotski::range_reverse(x);
}
// const auto start = clock();
klotski::cases::Ranges results;
results.reserve(klotski::cases::ALL_CASES_NUM_);
const auto start = std::chrono::system_clock::now();
klotski::cases::derive_demo(basic_ranges, flip, results, 0);
klotski::cases::derive_demo(basic_ranges, flip, results, 1);
klotski::cases::derive_demo(basic_ranges, flip, results, 2);
BS::thread_pool pool {};
klotski::cases::derive_demo(basic_ranges, flip, results, 4);
klotski::cases::derive_demo(basic_ranges, flip, results, 5);
klotski::cases::derive_demo(basic_ranges, flip, results, 6);
klotski::cases::BasicRanges::instance().build();
klotski::cases::derive_demo(basic_ranges, flip, results, 8);
klotski::cases::derive_demo(basic_ranges, flip, results, 9);
klotski::cases::derive_demo(basic_ranges, flip, results, 10);
klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) {
pool.submit_task(func);
}, [] {});
klotski::cases::derive_demo(basic_ranges, flip, results, 12);
klotski::cases::derive_demo(basic_ranges, flip, results, 13);
klotski::cases::derive_demo(basic_ranges, flip, results, 14);
pool.wait();
// std::cout << results.size() << " vs " << klotski::cases::ALL_CASES_NUM_ << std::endl;
std::cerr << std::chrono::system_clock::now() - start << std::endl;
// auto raw_code = RawCode::from_common_code(0x1A9BF0C00)->unwrap();
// auto ret = klotski::cases::group_extend_from_seed(raw_code);
@ -92,7 +80,7 @@ int main() {
// std::cout << "----" << std::endl;
// }
std::cerr << ((clock() - start) * 1000 / CLOCKS_PER_SEC) << "ms" << std::endl;
// std::cerr << ((clock() - start) * 1000 / CLOCKS_PER_SEC) << "ms" << std::endl;
return 0;
}

14
src/core_ffi/c_ffi/all_cases.cc

@ -40,13 +40,13 @@ void all_cases_build_async(const executor_t executor, const notifier_t callback)
}
void all_cases_build_parallel(executor_t executor) {
AllCases::instance().build_parallel([executor](Runner &&runner) {
const auto func = [](void *arg) {
(*static_cast<Runner*>(arg))();
delete static_cast<Runner*>(arg);
};
executor(func, new Runner {std::move(runner)});
});
// AllCases::instance().build_parallel([executor](Runner &&runner) {
// const auto func = [](void *arg) {
// (*static_cast<Runner*>(arg))();
// delete static_cast<Runner*>(arg);
// };
// executor(func, new Runner {std::move(runner)});
// });
}
void all_cases_build_parallel_async(executor_t executor, notifier_t callback) {

26
src/core_test/cases/all_cases.cc

@ -89,23 +89,23 @@ TEST_FF(AllCases, all_cases_race) {
}
TEST_FF(AllCases, all_cases_parallel) {
AllCases::instance().build_parallel(executor_.Entry());
EXPECT_TRUE(Available());
Verify();
// AllCases::instance().build_parallel(executor_.Entry());
// EXPECT_TRUE(Available());
// Verify();
AllCases::instance().build_parallel(executor_.Entry());
EXPECT_TRUE(Available());
Verify();
// AllCases::instance().build_parallel(executor_.Entry());
// EXPECT_TRUE(Available());
// Verify();
}
TEST_FF(AllCases, all_cases_parallel_race) {
racer_.Begin([this] {
AllCases::instance().build_parallel(executor_.Entry());
});
EXPECT_FALSE(Available());
racer_.Join();
EXPECT_TRUE(Available());
Verify();
// racer_.Begin([this] {
// AllCases::instance().build_parallel(executor_.Entry());
// });
// EXPECT_FALSE(Available());
// racer_.Join();
// EXPECT_TRUE(Available());
// Verify();
}
TEST_FF(AllCases, all_cases_async) {

Loading…
Cancel
Save