From f1550afbc35dfb09732bdeb4b13712d1f531c0e3 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 1 Jun 2024 18:10:10 +0800 Subject: [PATCH] perf: range checking algorithm --- src/core/all_cases/internal/all_cases.cc | 48 ++---- src/core/benchmark/group.cc | 55 +------ src/core/group/internal/group_union.cc | 6 +- src/core/main.cc | 35 +--- src/core/ranges/internal/derive.cc | 201 +++++++---------------- src/core/ranges/ranges.h | 16 +- 6 files changed, 90 insertions(+), 271 deletions(-) diff --git a/src/core/all_cases/internal/all_cases.cc b/src/core/all_cases/internal/all_cases.cc index e7c8b0e..610aaf1 100644 --- a/src/core/all_cases/internal/all_cases.cc +++ b/src/core/all_cases/internal/all_cases.cc @@ -19,34 +19,6 @@ static consteval Heads get_heads() { return heads; } -/// Check whether the combination of head and range is valid. -static int check_range(const int head, uint32_t range) { - uint32_t flags = 0b110011 << head; // fill 2x2 block - for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit - const auto num = std::countr_one(flags); - addr += num; // next unfilled block - flags >>= num; - switch (range & 0b11) { - case 0b00: // space - case 0b11: // 1x1 block - flags |= 0b1; - continue; - case 0b01: // 1x2 block - if (flags & 0b10 || addr % 4 == 3) { // invalid case - return offset; // broken offset - } - flags |= 0b11; - continue; - case 0b10: // 2x1 block - if (flags & 0b10000 || addr > 15) { // invalid case - return offset; // broken offset - } - flags |= 0b10001; - } - } - return 0; // pass check -} - /// Build all valid ranges of the specified head. static void build_cases(const std::vector &ranges, const std::vector &reversed, Ranges &release, const int head) { @@ -54,23 +26,25 @@ static void build_cases(const std::vector &ranges, release.reserve(ALL_CASES_NUM[head]); for (uint32_t index = 0; index < reversed.size(); ++index) { - if (const auto offset = check_range(head, reversed[index])) { // invalid case + CHECK_NEXT: + if (const auto offset = Ranges::check(head, reversed[index])) { // invalid case if (offset > 14) { - continue; + ++index; // never overflow + goto CHECK_NEXT; } // !! <- broken // ( xx xx xx ) xx xx xx ... [range] // +1 00 00 00 ... (delta) const int tmp = (16 - offset) * 2; - uint32_t min_next = ((ranges[index] >> tmp) + 1) << tmp; // next possible range + const uint32_t min_next = ((ranges[index] >> tmp) + 1) << tmp; // next possible range - if (offset > 5) { // located next range by min_next - while (ranges[++index] < min_next) {} - } else { - index = std::lower_bound(ranges.begin() + index, ranges.end(), min_next) - ranges.begin(); + // min_next always less than ranges.back() + if (offset > 5) { + while (ranges[++index] < min_next) {} // located next range + goto CHECK_NEXT; } - --index; - continue; + index = std::lower_bound(ranges.begin() + index, ranges.end(), min_next) - ranges.begin(); + goto CHECK_NEXT; } release.emplace_back(range_reverse(reversed[index])); // release valid case } diff --git a/src/core/benchmark/group.cc b/src/core/benchmark/group.cc index 1f22f16..d99e022 100644 --- a/src/core/benchmark/group.cc +++ b/src/core/benchmark/group.cc @@ -217,58 +217,17 @@ static void RangesDerive(benchmark::State &state) { auto &basic_ranges = klotski::cases::BasicRanges::instance().fetch(); - klotski::cases::Ranges flip {basic_ranges}; - for (auto &x : flip) { - x = klotski::range_reverse(x); - } - - klotski::cases::BidiRanges bidi_ranges; - for (auto x : basic_ranges) { - bidi_ranges.emplace_back(klotski::cases::bidi_t {.r1 = x, .r2 = klotski::range_reverse(x)}); - } + // klotski::cases::Ranges results; + // results.reserve(klotski::cases::ALL_CASES_NUM_); - klotski::cases::Ranges results; - // results.reserve(klotski::cases::ALL_CASES_NUM[5]); - results.reserve(klotski::cases::ALL_CASES_NUM_); + auto group_union = klotski::cases::GroupUnion::unsafe_create(169); for (auto _ : state) { - results.clear(); + // results.clear(); // results.reserve(klotski::cases::ALL_CASES_NUM[5]); - // basic_ranges.derive(5, results); - - klotski::cases::derive_demo(basic_ranges, flip, results, 0); - klotski::cases::derive_demo(basic_ranges, flip, results, 1); - klotski::cases::derive_demo(basic_ranges, flip, results, 2); - - klotski::cases::derive_demo(basic_ranges, flip, results, 4); - klotski::cases::derive_demo(basic_ranges, flip, results, 5); - klotski::cases::derive_demo(basic_ranges, flip, results, 6); - - klotski::cases::derive_demo(basic_ranges, flip, results, 8); - klotski::cases::derive_demo(basic_ranges, flip, results, 9); - klotski::cases::derive_demo(basic_ranges, flip, results, 10); - - klotski::cases::derive_demo(basic_ranges, flip, results, 12); - klotski::cases::derive_demo(basic_ranges, flip, results, 13); - klotski::cases::derive_demo(basic_ranges, flip, results, 14); - - // klotski::cases::derive_demo_pro(bidi_ranges, results, 0); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 1); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 2); - // - // klotski::cases::derive_demo_pro(bidi_ranges, results, 4); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 5); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 6); - // - // klotski::cases::derive_demo_pro(bidi_ranges, results, 8); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 9); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 10); - // - // klotski::cases::derive_demo_pro(bidi_ranges, results, 12); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 13); - // klotski::cases::derive_demo_pro(bidi_ranges, results, 14); + volatile auto tmp = group_union.cases(); } @@ -284,10 +243,10 @@ static void RangesDerive(benchmark::State &state) { // BENCHMARK(SpawnRanges)->Unit(benchmark::kMillisecond); -BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond); +// BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond); // BENCHMARK(OriginAllCases)->Unit(benchmark::kMillisecond); -// BENCHMARK(RangesDerive)->Unit(benchmark::kMillisecond); +BENCHMARK(RangesDerive)->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/src/core/group/internal/group_union.cc b/src/core/group/internal/group_union.cc index 5cfa21a..175be38 100644 --- a/src/core/group/internal/group_union.cc +++ b/src/core/group/internal/group_union.cc @@ -93,11 +93,15 @@ klotski::cases::RangesUnion klotski::cases::GroupUnion::cases() const { int n_1x1 = TYPE_ID_N_1x1_NUM[type_id_]; // TODO: cal from type_id ranges.spawn(n, n_2x1, n_1x1); + // for (int i = 0; i < TYPE_ID_LIMIT; ++i) { + // ranges.spawn(TYPE_ID_N_NUM[i], TYPE_ID_N_2x1_NUM[i], TYPE_ID_N_1x1_NUM[i]); + // } + // std::stable_sort(ranges.begin(), ranges.end()); + for (auto &x : ranges) { x = klotski::range_reverse(x); } - // std::cout << "start derive" << std::endl; RangesUnion cases; ranges.derive(0x0, cases[0x0]); ranges.derive(0x1, cases[0x1]); diff --git a/src/core/main.cc b/src/core/main.cc index ce8c3f3..5450a6f 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -34,39 +34,10 @@ int main() { const auto start = std::chrono::system_clock::now(); - // klotski::cases::BasicRanges::instance().build(); + AllCases::instance().build(); - BS::thread_pool pool {}; - - // auto demo = [](auto &&self, int val) { - // std::cout << "val = " << val << std::endl; - // if (val == 0) { - // return; - // } - // self(self, val - 1); - // }; - // - // demo(demo, 5); - - // constexpr std::array kk {1, 2, 3, 4, 5}; - // auto ret = klotski::to_offset(kk, 0); - // std::cout << std::format("{}", ret) << std::endl; - - // klotski::cases::BasicRanges::instance().build(); - - klotski::cases::BasicRanges::instance().build_async([&pool](auto &&func) { - pool.submit_task(func); - }, [] { - // std::cout << "all done" << std::endl; - }); - - // klotski::cases::AllCases::instance().build_parallel_async([&pool](auto func) { - // pool.submit_task(func); - // }, [] {}); - - pool.wait(); - - // std::cout << BasicRanges::instance().fetch().size() << std::endl; + // auto ret = GroupUnion::unsafe_create(169).cases(); + // std::cout << ret[4].size() << std::endl; std::cerr << std::chrono::system_clock::now() - start << std::endl; diff --git a/src/core/ranges/internal/derive.cc b/src/core/ranges/internal/derive.cc index ce05cea..5777e73 100644 --- a/src/core/ranges/internal/derive.cc +++ b/src/core/ranges/internal/derive.cc @@ -7,8 +7,8 @@ using klotski::cases::Ranges; -/// Check whether the combination of head and range is valid. -static int check_range(const int head, uint32_t range) { +int Ranges::check(const int head, uint32_t range) { + KLSK_ASSUME(head >= 0 && head < 16 && head % 4 != 3); uint32_t flags = 0b110011 << head; // fill 2x2 block for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit const auto num = std::countr_one(flags); @@ -35,169 +35,86 @@ static int check_range(const int head, uint32_t range) { return 0; // pass check } -// 0: [7, 8, 9, 10, 11, 12, 13, 14, 15] -// 1: [9, 10, 11, 12, 13, 14] -// 2: [8, 9, 10, 11, 12, 13] -// 3: [7, 8, 9, 10, 11, 12] -// 5: [8, 9, 10, 11, 12, 13] -// 10: [7, 8, 9, 10, 11, 12] -// 15: [8, 9, 10, 11, 12] - -// 190: [5, 6, 7, 8] -// 284: [6, 7, 8] -// 327: [6, 7, 8] -// 591: [5, 6, 7, 8] -// 810: [5, 6, 7, 8] -// 895: [6, 7, 8] -// 1784: [4, 5, 6, 7] -// 2276: [5, 6, 7] -// 2447: [5, 6, 7] -// 5245: [4, 5, 6] -// 6346: [4, 5, 6] -// 6687: [5, 6] -// 15162: [3, 4, 5] -// 17588: [4, 5] -// 18271: [4, 5] -// 43243: [3, 4] -// 48548: [3] -// 48554: [3, 4] -// 49919: [4] -// 122103: [2] -// 122124: [2, 3] -// 133652: [3] -// 136382: [3] -// 342265: [2] -// 367139: [2] - -void klotski::cases::derive_demo(const std::vector &range, const std::vector &reversed, std::vector &output, int head) { - - uint32_t reversed_max = reversed.back(); - for (uint32_t index = 0; index < range.size(); ++index) { - if (const auto offset = check_range(head, range[index])) { // invalid case - - if (offset > 14) { - continue; - } - - // uint32_t index_bak = index; +// int check_range(const int head, uint32_t range) { +// KLSK_ASSUME(head >= 0 && head < 16 && head % 4 != 3); +// uint32_t flags = 0b110011 << head; // fill 2x2 block +// for (int addr = 0, offset = 1; range; range >>= 2, ++offset) { // traverse every 2-bit +// const auto num = std::countr_one(flags); +// addr += num; // next unfilled block +// flags >>= num; +// switch (range & 0b11) { +// case 0b00: // space +// case 0b11: // 1x1 block +// flags |= 0b1; +// continue; +// case 0b01: // 1x2 block +// if (flags & 0b10 || addr % 4 == 3) { // invalid case +// return offset; // broken offset +// } +// flags |= 0b11; +// continue; +// case 0b10: // 2x1 block +// if (flags & 0b10000 || addr > 15) { // invalid case +// return offset; // broken offset +// } +// flags |= 0b10001; +// } +// } +// return 0; // pass check +// } - int left_offset = (16 - offset) * 2; - uint32_t min_next = ((reversed[index] >> left_offset) + 1) << left_offset; - // min_next = std::min(min_next, reversed_max); - // std::cout << min_next << " vs " << reversed_max << std::endl; - - if (offset > 5) { - while (reversed[++index] < min_next) { // located next range - // if (index > range.size()) { - // std::cout << "get it" << std::endl; - // } - } - --index; - } else { - auto begin = reversed.begin() + index; - auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin; - index += kk - 1; - } +void Ranges::derive(const int head, Ranges &output) const { - // std::cout << index << " vs " << index_bak << std::endl; + // Ranges reversed {*this}; + // + // for (auto &x : reversed) { + // x = klotski::range_reverse(x); + // } - // auto end_distance = range.size() - index; - // auto real_distance = index - index_bak; + // std::cout << "last = " << range_reverse(this->back()) << std::endl; - // std::cout << std::format("{} {} {}", head, real_distance, offset) << std::endl; + // auto &ranges = *this; - // std::cout << real_distance << " vs " << end_distance << std::endl; + // uint32_t min_next = 0; + uint32_t last_val = range_reverse(this->back()); - // uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range + for (uint32_t index = 0; index < size(); ++index) { + if (const auto offset = check(head, (*this)[index])) { // invalid case + uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range /// !! <- broken /// ( xx xx xx ) xx xx xx ... [reversed range] /// +1 00 00 00 ... (delta) - // tmp += range_reverse(range[index]) & ~(tmp - 1); - - // TODO: overflow here in some type_id - // TODO: -> tmp > range[-1] - // TODO: maybe using binary search here - - // while (range_reverse(range[++index]) < tmp) {} // located next range - // --index; - continue; - } - output.emplace_back(range_reverse(range[index])); // release valid case - } -} - -void klotski::cases::derive_demo_pro(const klotski::cases::BidiRanges &bidi_range, std::vector &output, int head) { - for (uint32_t index = 0; index < bidi_range.size(); ++index) { - if (const auto offset = check_range(head, bidi_range[index].r1)) { // invalid case - - // if (offset > 14) { - // continue; - // } - - // uint32_t index_bak = index; + tmp += range_reverse((*this)[index]) & ~(tmp - 1); - int left_offset = (16 - offset) * 2; - uint32_t min_next = ((bidi_range[index].r2 >> left_offset) + 1) << left_offset; - // min_next = std::min(min_next, reversed_max); - // std::cout << min_next << " vs " << reversed_max << std::endl; + // std::cout << "min next = " << tmp << std::endl; - // if (offset > 5) { - while (bidi_range[++index].r2 < min_next) { // located next range - // if (index > range.size()) { - // std::cout << "get it" << std::endl; - // } - } - --index; - // } else { - // auto begin = reversed.begin() + index; - // auto kk = std::lower_bound(begin, reversed.end(), min_next) - begin; - // index += kk - 1; + // if (min_next > tmp) { + // std::cout << "error" << std::endl; // } + auto min_next = tmp; - // std::cout << index << " vs " << index_bak << std::endl; + // const int tmp = (16 - offset) * 2; + // const uint32_t min_next = ((range_reverse((*this)[index]) >> tmp) + 1) << tmp; - // auto end_distance = range.size() - index; - // auto real_distance = index - index_bak; - - // std::cout << std::format("{} {} {}", head, real_distance, offset) << std::endl; - - // std::cout << real_distance << " vs " << end_distance << std::endl; - - // uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range - /// !! <- broken - /// ( xx xx xx ) xx xx xx ... [reversed range] - /// +1 00 00 00 ... (delta) - // tmp += range_reverse(range[index]) & ~(tmp - 1); + if (min_next > last_val) { + // std::cout << "get it" << std::endl; + break; + // return; + } // TODO: overflow here in some type_id // TODO: -> tmp > range[-1] // TODO: maybe using binary search here - // while (range_reverse(range[++index]) < tmp) {} // located next range - // --index; + while (range_reverse((*this)[++index]) < min_next) {} // located next range + --index; continue; } - output.emplace_back(bidi_range[index].r2); // release valid case + output.emplace_back(range_reverse((*this)[index])); // release valid case } -} -void Ranges::derive(const int head, Ranges &output) const { - for (uint32_t index = 0; index < size(); ++index) { - if (const auto offset = check_range(head, (*this)[index])) { // invalid case - uint32_t tmp = 1U << (32 - offset * 2); // distance to next possible range - /// !! <- broken - /// ( xx xx xx ) xx xx xx ... [reversed range] - /// +1 00 00 00 ... (delta) - tmp += range_reverse((*this)[index]) & ~(tmp - 1); + // std::cout << "min_next = " << min_next << " | last_val = " << last_val << std::endl; - // TODO: overflow here in some type_id - // TODO: -> tmp > range[-1] - // TODO: maybe using binary search here + // std::cout << (min_next <= last_val) << std::endl; - // while (range_reverse((*this)[++index]) < tmp) {} // located next range - // --index; - continue; - } - output.emplace_back(range_reverse((*this)[index])); // release valid case - } } diff --git a/src/core/ranges/ranges.h b/src/core/ranges/ranges.h index 6b6cfe6..cffb7fb 100644 --- a/src/core/ranges/ranges.h +++ b/src/core/ranges/ranges.h @@ -3,14 +3,9 @@ #include #include -namespace klotski::cases { - -struct bidi_t { - uint32_t r1; - uint32_t r2; -}; +#include "utils/utility.h" -typedef std::vector BidiRanges; +namespace klotski::cases { class Ranges : public std::vector { public: @@ -19,11 +14,10 @@ public: /// Derive the legal klotski-ranges with specified head. void derive(int head, Ranges &output) const; -}; - -void derive_demo(const std::vector &range, const std::vector &reversed, std::vector &output, int head); -void derive_demo_pro(const BidiRanges &bidi_range, std::vector &output, int head); + /// Check whether the combination of head and range is valid. + static KLSK_INLINE int check(int head, uint32_t range); +}; typedef std::array RangesUnion;