Browse Source

perf: range checking algorithm

master
Dnomd343 6 months ago
parent
commit
f1550afbc3
  1. 48
      src/core/all_cases/internal/all_cases.cc
  2. 55
      src/core/benchmark/group.cc
  3. 6
      src/core/group/internal/group_union.cc
  4. 35
      src/core/main.cc
  5. 201
      src/core/ranges/internal/derive.cc
  6. 16
      src/core/ranges/ranges.h

48
src/core/all_cases/internal/all_cases.cc

@ -19,34 +19,6 @@ static consteval Heads get_heads() {
return heads;
}
/// Check whether the combination of head and range is valid.
static int check_range(const int head, uint32_t range) {
uint32_t flags = 0b110011 << head; // fill 2x2 block
for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit
const auto num = std::countr_one(flags);
addr += num; // next unfilled block
flags >>= num;
switch (range & 0b11) {
case 0b00: // space
case 0b11: // 1x1 block
flags |= 0b1;
continue;
case 0b01: // 1x2 block
if (flags & 0b10 || addr % 4 == 3) { // invalid case
return offset; // broken offset
}
flags |= 0b11;
continue;
case 0b10: // 2x1 block
if (flags & 0b10000 || addr > 15) { // invalid case
return offset; // broken offset
}
flags |= 0b10001;
}
}
return 0; // pass check
}
/// Build all valid ranges of the specified head.
static void build_cases(const std::vector<uint32_t> &ranges,
const std::vector<uint32_t> &reversed, Ranges &release, const int head) {
@ -54,23 +26,25 @@ static void build_cases(const std::vector<uint32_t> &ranges,
release.reserve(ALL_CASES_NUM[head]);
for (uint32_t index = 0; index < reversed.size(); ++index) {
if (const auto offset = check_range(head, reversed[index])) { // invalid case
CHECK_NEXT:
if (const auto offset = Ranges::check(head, reversed[index])) { // invalid case
if (offset > 14) {
continue;
++index; // never overflow
goto CHECK_NEXT;
}
// !! <- broken
// ( xx xx xx ) xx xx xx ... [range]
// +1 00 00 00 ... (delta)
const int tmp = (16 - offset) * 2;
uint32_t min_next = ((ranges[index] >> tmp) + 1) << tmp; // next possible range
const uint32_t min_next = ((ranges[index] >> tmp) + 1) << tmp; // next possible range
if (offset > 5) { // located next range by min_next
while (ranges[++index] < min_next) {}
} else {
index = std::lower_bound(ranges.begin() + index, ranges.end(), min_next) - ranges.begin();
// min_next always less than ranges.back()
if (offset > 5) {
while (ranges[++index] < min_next) {} // located next range
goto CHECK_NEXT;
}
--index;
continue;
index = std::lower_bound(ranges.begin() + index, ranges.end(), min_next) - ranges.begin();
goto CHECK_NEXT;
}
release.emplace_back(range_reverse(reversed[index])); // release valid case
}

55
src/core/benchmark/group.cc

@ -217,58 +217,17 @@ static void RangesDerive(benchmark::State &state) {
auto &basic_ranges = klotski::cases::BasicRanges::instance().fetch();
klotski::cases::Ranges flip {basic_ranges};
for (auto &x : flip) {
x = klotski::range_reverse(x);
}
klotski::cases::BidiRanges bidi_ranges;
for (auto x : basic_ranges) {
bidi_ranges.emplace_back(klotski::cases::bidi_t {.r1 = x, .r2 = klotski::range_reverse(x)});
}
// klotski::cases::Ranges results;
// results.reserve(klotski::cases::ALL_CASES_NUM_);
klotski::cases::Ranges results;
// results.reserve(klotski::cases::ALL_CASES_NUM[5]);
results.reserve(klotski::cases::ALL_CASES_NUM_);
auto group_union = klotski::cases::GroupUnion::unsafe_create(169);
for (auto _ : state) {
results.clear();
// results.clear();
// results.reserve(klotski::cases::ALL_CASES_NUM[5]);
// basic_ranges.derive(5, results);
klotski::cases::derive_demo(basic_ranges, flip, results, 0);
klotski::cases::derive_demo(basic_ranges, flip, results, 1);
klotski::cases::derive_demo(basic_ranges, flip, results, 2);
klotski::cases::derive_demo(basic_ranges, flip, results, 4);
klotski::cases::derive_demo(basic_ranges, flip, results, 5);
klotski::cases::derive_demo(basic_ranges, flip, results, 6);
klotski::cases::derive_demo(basic_ranges, flip, results, 8);
klotski::cases::derive_demo(basic_ranges, flip, results, 9);
klotski::cases::derive_demo(basic_ranges, flip, results, 10);
klotski::cases::derive_demo(basic_ranges, flip, results, 12);
klotski::cases::derive_demo(basic_ranges, flip, results, 13);
klotski::cases::derive_demo(basic_ranges, flip, results, 14);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 0);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 1);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 2);
//
// klotski::cases::derive_demo_pro(bidi_ranges, results, 4);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 5);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 6);
//
// klotski::cases::derive_demo_pro(bidi_ranges, results, 8);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 9);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 10);
//
// klotski::cases::derive_demo_pro(bidi_ranges, results, 12);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 13);
// klotski::cases::derive_demo_pro(bidi_ranges, results, 14);
volatile auto tmp = group_union.cases();
}
@ -284,10 +243,10 @@ static void RangesDerive(benchmark::State &state) {
// BENCHMARK(SpawnRanges)->Unit(benchmark::kMillisecond);
BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond);
// BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond);
// BENCHMARK(OriginAllCases)->Unit(benchmark::kMillisecond);
// BENCHMARK(RangesDerive)->Unit(benchmark::kMillisecond);
BENCHMARK(RangesDerive)->Unit(benchmark::kMillisecond);
BENCHMARK_MAIN();

6
src/core/group/internal/group_union.cc

@ -93,11 +93,15 @@ klotski::cases::RangesUnion klotski::cases::GroupUnion::cases() const {
int n_1x1 = TYPE_ID_N_1x1_NUM[type_id_]; // TODO: cal from type_id
ranges.spawn(n, n_2x1, n_1x1);
// for (int i = 0; i < TYPE_ID_LIMIT; ++i) {
// ranges.spawn(TYPE_ID_N_NUM[i], TYPE_ID_N_2x1_NUM[i], TYPE_ID_N_1x1_NUM[i]);
// }
// std::stable_sort(ranges.begin(), ranges.end());
for (auto &x : ranges) {
x = klotski::range_reverse(x);
}
// std::cout << "start derive" << std::endl;
RangesUnion cases;
ranges.derive(0x0, cases[0x0]);
ranges.derive(0x1, cases[0x1]);

35
src/core/main.cc

@ -34,39 +34,10 @@ int main() {
const auto start = std::chrono::system_clock::now();
// klotski::cases::BasicRanges::instance().build();
AllCases::instance().build();
BS::thread_pool pool {};
// auto demo = [](auto &&self, int val) {
// std::cout << "val = " << val << std::endl;
// if (val == 0) {
// return;
// }
// self(self, val - 1);
// };
//
// demo(demo, 5);
// constexpr std::array<int, 5> kk {1, 2, 3, 4, 5};
// auto ret = klotski::to_offset(kk, 0);
// std::cout << std::format("{}", ret) << std::endl;
// klotski::cases::BasicRanges::instance().build();
klotski::cases::BasicRanges::instance().build_async([&pool](auto &&func) {
pool.submit_task(func);
}, [] {
// std::cout << "all done" << std::endl;
});
// klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) {
// pool.submit_task(func);
// }, [] {});
pool.wait();
// std::cout << BasicRanges::instance().fetch().size() << std::endl;
// auto ret = GroupUnion::unsafe_create(169).cases();
// std::cout << ret[4].size() << std::endl;
std::cerr << std::chrono::system_clock::now() - start << std::endl;

201
src/core/ranges/internal/derive.cc

@ -7,8 +7,8 @@
using klotski::cases::Ranges;
/// Check whether the combination of head and range is valid.
static int check_range(const int head, uint32_t range) {
int Ranges::check(const int head, uint32_t range) {
KLSK_ASSUME(head >= 0 && head < 16 && head % 4 != 3);
uint32_t flags = 0b110011 << head; // fill 2x2 block
for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit
const auto num = std::countr_one(flags);
@ -35,169 +35,86 @@ static int check_range(const int head, uint32_t range) {
return 0; // pass check
}
// 0: [7, 8, 9, 10, 11, 12, 13, 14, 15]
// 1: [9, 10, 11, 12, 13, 14]
// 2: [8, 9, 10, 11, 12, 13]
// 3: [7, 8, 9, 10, 11, 12]
// 5: [8, 9, 10, 11, 12, 13]
// 10: [7, 8, 9, 10, 11, 12]
// 15: [8, 9, 10, 11, 12]
// 190: [5, 6, 7, 8]
// 284: [6, 7, 8]
// 327: [6, 7, 8]
// 591: [5, 6, 7, 8]
// 810: [5, 6, 7, 8]
// 895: [6, 7, 8]
// 1784: [4, 5, 6, 7]
// 2276: [5, 6, 7]
// 2447: [5, 6, 7]
// 5245: [4, 5, 6]
// 6346: [4, 5, 6]
// 6687: [5, 6]
// 15162: [3, 4, 5]
// 17588: [4, 5]
// 18271: [4, 5]
// 43243: [3, 4]
// 48548: [3]
// 48554: [3, 4]
// 49919: [4]
// 122103: [2]
// 122124: [2, 3]
// 133652: [3]
// 136382: [3]
// 342265: [2]
// 367139: [2]
void klotski::cases::derive_demo(const std::vector<uint32_t> &range, const std::vector<uint32_t> &reversed, std::vector<uint32_t> &output, int head) {
uint32_t reversed_max = reversed.back();
for (uint32_t index = 0; index < range.size(); ++index) {
if (const auto offset = check_range(head, range[index])) { // invalid case
if (offset > 14) {
continue;
}
// uint32_t index_bak = index;
// int check_range(const int head, uint32_t range) {
// KLSK_ASSUME(head >= 0 && head < 16 && head % 4 != 3);
// uint32_t flags = 0b110011 << head; // fill 2x2 block
// for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit
// const auto num = std::countr_one(flags);
// addr += num; // next unfilled block
// flags >>= num;
// switch (range & 0b11) {
// case 0b00: // space
// case 0b11: // 1x1 block
// flags |= 0b1;
// continue;
// case 0b01: // 1x2 block
// if (flags & 0b10 || addr % 4 == 3) { // invalid case
// return offset; // broken offset
// }
// flags |= 0b11;
// continue;
// case 0b10: // 2x1 block
// if (flags & 0b10000 || addr > 15) { // invalid case
// return offset; // broken offset
// }
// flags |= 0b10001;
// }
// }
// return 0; // pass check
// }
int left_offset = (16 - offset) * 2;
uint32_t min_next = ((reversed[index] >> left_offset) + 1) << left_offset;
// min_next = std::min(min_next, reversed_max);
// std::cout << min_next << " vs " << reversed_max << std::endl;
if (offset > 5) {
while (reversed[++index] < min_next) { // located next range
// if (index > range.size()) {
// std::cout << "get it" << std::endl;
// }
}
--index;
} else {
auto begin = reversed.begin() + index;
auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin;
index += kk - 1;
}
void Ranges::derive(const int head, Ranges &output) const {
// std::cout << index << " vs " << index_bak << std::endl;
// Ranges reversed {*this};
//
// for (auto &x : reversed) {
// x = klotski::range_reverse(x);
// }
// auto end_distance = range.size() - index;
// auto real_distance = index - index_bak;
// std::cout << "last = " << range_reverse(this->back()) << std::endl;
// std::cout << std::format("{} {} {}", head, real_distance, offset) << std::endl;
// auto &ranges = *this;
// std::cout << real_distance << " vs " << end_distance << std::endl;
// uint32_t min_next = 0;
uint32_t last_val = range_reverse(this->back());
// uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
for (uint32_t index = 0; index < size(); ++index) {
if (const auto offset = check(head, (*this)[index])) { // invalid case
uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
// tmp += range_reverse(range[index]) & ~(tmp - 1);
// TODO: overflow here in some type_id
// TODO: -> tmp > range[-1]
// TODO: maybe using binary search here
// while (range_reverse(range[++index]) < tmp) {} // located next range
// --index;
continue;
}
output.emplace_back(range_reverse(range[index])); // release valid case
}
}
void klotski::cases::derive_demo_pro(const klotski::cases::BidiRanges &bidi_range, std::vector<uint32_t> &output, int head) {
for (uint32_t index = 0; index < bidi_range.size(); ++index) {
if (const auto offset = check_range(head, bidi_range[index].r1)) { // invalid case
// if (offset > 14) {
// continue;
// }
// uint32_t index_bak = index;
tmp += range_reverse((*this)[index]) & ~(tmp - 1);
int left_offset = (16 - offset) * 2;
uint32_t min_next = ((bidi_range[index].r2 >> left_offset) + 1) << left_offset;
// min_next = std::min(min_next, reversed_max);
// std::cout << min_next << " vs " << reversed_max << std::endl;
// std::cout << "min next = " << tmp << std::endl;
// if (offset > 5) {
while (bidi_range[++index].r2 < min_next) { // located next range
// if (index > range.size()) {
// std::cout << "get it" << std::endl;
// }
}
--index;
// } else {
// auto begin = reversed.begin() + index;
// auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin;
// index += kk - 1;
// if (min_next > tmp) {
// std::cout << "error" << std::endl;
// }
auto min_next = tmp;
// std::cout << index << " vs " << index_bak << std::endl;
// const int tmp = (16 - offset) * 2;
// const uint32_t min_next = ((range_reverse((*this)[index]) >> tmp) + 1) << tmp;
// auto end_distance = range.size() - index;
// auto real_distance = index - index_bak;
// std::cout << std::format("{} {} {}", head, real_distance, offset) << std::endl;
// std::cout << real_distance << " vs " << end_distance << std::endl;
// uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
// tmp += range_reverse(range[index]) & ~(tmp - 1);
if (min_next > last_val) {
// std::cout << "get it" << std::endl;
break;
// return;
}
// TODO: overflow here in some type_id
// TODO: -> tmp > range[-1]
// TODO: maybe using binary search here
// while (range_reverse(range[++index]) < tmp) {} // located next range
// --index;
while (range_reverse((*this)[++index]) < min_next) {} // located next range
--index;
continue;
}
output.emplace_back(bidi_range[index].r2); // release valid case
output.emplace_back(range_reverse((*this)[index])); // release valid case
}
}
void Ranges::derive(const int head, Ranges &output) const {
for (uint32_t index = 0; index < size(); ++index) {
if (const auto offset = check_range(head, (*this)[index])) { // invalid case
uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
tmp += range_reverse((*this)[index]) & ~(tmp - 1);
// std::cout << "min_next = " << min_next << " | last_val = " << last_val << std::endl;
// TODO: overflow here in some type_id
// TODO: -> tmp > range[-1]
// TODO: maybe using binary search here
// std::cout << (min_next <= last_val) << std::endl;
// while (range_reverse((*this)[++index]) < tmp) {} // located next range
// --index;
continue;
}
output.emplace_back(range_reverse((*this)[index])); // release valid case
}
}

16
src/core/ranges/ranges.h

@ -3,14 +3,9 @@
#include <vector>
#include <cstdint>
namespace klotski::cases {
struct bidi_t {
uint32_t r1;
uint32_t r2;
};
#include "utils/utility.h"
typedef std::vector<bidi_t> BidiRanges;
namespace klotski::cases {
class Ranges : public std::vector<uint32_t> {
public:
@ -19,11 +14,10 @@ public:
/// Derive the legal klotski-ranges with specified head.
void derive(int head, Ranges &output) const;
};
void derive_demo(const std::vector<uint32_t> &range, const std::vector<uint32_t> &reversed, std::vector<uint32_t> &output, int head);
void derive_demo_pro(const BidiRanges &bidi_range, std::vector<uint32_t> &output, int head);
/// Check whether the combination of head and range is valid.
static KLSK_INLINE int check(int head, uint32_t range);
};
typedef std::array<Ranges, 16> RangesUnion;

Loading…
Cancel
Save