From fbbecd266463fc2a255310c0114963f0a461e1bd Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 15 Jan 2023 00:49:47 +0800 Subject: [PATCH] update: perf speed of BasicRanges module --- src/CMakeLists.txt | 4 +- src/all_cases/basic_ranges.cc | 177 ++++++++++++++++++++++------------ src/all_cases/basic_ranges.h | 10 +- src/main.cc | 5 +- 4 files changed, 130 insertions(+), 66 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ece3466..9e4f157 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,8 +28,8 @@ add_subdirectory(analyse) ################################ -#add_executable(klotski main.cc) -add_executable(klotski main.cc basic_ranges_demo.cc) +add_executable(klotski main.cc) +#add_executable(klotski main.cc basic_ranges_demo.cc) ################################ diff --git a/src/all_cases/basic_ranges.cc b/src/all_cases/basic_ranges.cc index eb8ad9f..525390d 100644 --- a/src/all_cases/basic_ranges.cc +++ b/src/all_cases/basic_ranges.cc @@ -1,3 +1,4 @@ +#include #include #include "common.h" #include "basic_ranges.h" @@ -45,84 +46,140 @@ void BasicRanges::build() { // ensure that basic ranges available } } +#include + +void BasicRanges::sort_data(std::vector &flags, std::vector &raw) { + struct heap_node { + uint32_t value; + int index; + int limit; + }; + + struct compare { + bool operator() (heap_node n1, heap_node n2) { + return n1.value > n2.value; + } + }; + std::priority_queue, compare> min_heap; + + for (auto i = 0; i < flags.size() - 1; ++i) { + min_heap.push({ + .value = raw[flags[i]], + .index = flags[i], + .limit = flags[i + 1] - 1, + }); + } + + while (!min_heap.empty()) { + auto current = min_heap.top(); + min_heap.pop(); + data.emplace_back(current.value); + if (current.index != current.limit) { + min_heap.push({ + .value = raw[current.index + 1], + .index = current.index + 1, + .limit = current.limit, + }); + } + } +} + void BasicRanges::build_data() { // build basic ranges BasicRanges::data.reserve(BASIC_RANGES_SIZE); // memory pre-allocated + + std::vector raw_data; + raw_data.reserve(BASIC_RANGES_SIZE); + + std::vector start_points; + for (int n = 0; n <= 7; ++n) { // number of 1x2 and 2x1 block -> 0 ~ 7 for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) { // number of 2x1 block -> 0 ~ n for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { // number of 1x1 block -> 0 ~ (14 - 2n) - int n_1x2 = n - n_2x1; - int n_space = 16 - n * 2 - n_1x1; - generate(n_space, n_1x2, n_2x1, n_1x1); // generate target ranges - /// 0x0 -> 00 | 1x2 -> 01 | 2x1 -> 10 | 1x1 -> 11 + + start_points.emplace_back(raw_data.size()); + + generate(raw_data, generate_t { // generate target ranges + .n1 = 16 - n * 2 - n_1x1, /// space -> 00 + .n2 = n - n_2x1, /// 1x2 -> 01 + .n3 = n_2x1, /// 2x1 -> 10 + .n4 = n_1x1, /// 1x1 -> 11 + }); } } } - std::sort(BasicRanges::data.begin(), BasicRanges::data.end()); // sort basic ranges + + start_points.emplace_back(raw_data.size()); + +// std::sort(BasicRanges::data.begin(), BasicRanges::data.end()); // sort basic ranges + + BasicRanges::sort_data(start_points, raw_data); + + std::cout << "size: " << BasicRanges::data.size() << std::endl; + +// std::sort(raw_data.begin(), raw_data.end()); +// std::cout << raw_data.size() << std::endl; + // for (auto &range : BasicRanges::data) { // range = Common::range_reverse(range); // basic ranges reverse // } } -void BasicRanges::generate(int n1, int n2, int n3, int n4) { // generate specific basic ranges - int len, limit; - constexpr uint32_t MASK_01 = 0b01 << 30; - constexpr uint32_t MASK_10 = 0b10 << 30; - constexpr uint32_t MASK_11 = 0b11 << 30; - std::vector cache_1, cache_2; - - len = n1 + n2; - limit = 0b1 << len; - for (uint32_t bin = 0; bin < limit; ++bin) { - if (binary_count(bin) != n2) { // skip binary without `n2` non-zero bits - continue; - } - uint32_t range = 0; - for (int i = 0; i < len; ++i) { // generate range base on binary value - range >>= 2; - if ((bin >> i) & 0b1) { // non-zero bit - range |= MASK_01; - } +void BasicRanges::generate(std::vector &release, generate_t info) { // generate specific basic ranges + constexpr uint32_t MASK_01 = (uint32_t)0b01 << 30; + constexpr uint32_t MASK_10 = (uint32_t)0b10 << 30; + constexpr uint32_t MASK_11 = (uint32_t)0b11 << 30; + + /// n4 n3 n2 n1 + /// 00000000 00000000 00000000 00000000 (32-bits) + struct build_t { + uint32_t nx; + uint32_t prefix; + int offset; + }; + + std::queue cache; + cache.emplace(build_t { + .nx = static_cast(info.n1 | info.n2 << 8 | info.n3 << 16 | info.n4 << 24), + .prefix = 0x00000000, + .offset = 0, + }); + + while (!cache.empty()) { // queue without elements + auto current = cache.front(); + if (!current.nx) { // both n1, n2, n3, n4 -> 0 + release.emplace_back(current.prefix); // release prefix as basic range + cache.pop(); + continue; // skip search } - cache_1.emplace_back(range); // insert into first layer - } - len += n3; - limit <<= n3; - for (uint32_t bin = 0; bin < limit; ++bin) { - if (binary_count(bin) != n3) { // skip binary without `n3` non-zero bits - continue; + if (current.nx & 0xFF) { // n1 -> `00` + cache.emplace(build_t { + .nx = current.nx - 0x01, // --n1 + .prefix = current.prefix, + .offset = current.offset + 2, + }); } - for (uint32_t base : cache_1) { // traverse first layer - uint32_t range = 0; - for (int i = 0; i < len; ++i) { // generate range base on binary value - if ((bin >> i) & 0b1) { // non-zero bit - (range >>= 2) |= MASK_10; - continue; - } - (range >>= 2) |= base & MASK_11; - base <<= 2; - } - cache_2.emplace_back(range); // insert into second layer + if (current.nx & 0xFF00) { // n2 -> `01` + cache.emplace(build_t { + .nx = current.nx - 0x0100, // --n2 + .prefix = current.prefix | (MASK_01 >> current.offset), + .offset = current.offset + 2, + }); } - } - - len += n4; - limit <<= n4; - for (uint32_t bin = 0; bin < limit; ++bin) { - if (binary_count(bin) != n4) { // skip binary without `n4` non-zero bits - continue; + if (current.nx & 0xFF0000) { // n3 -> `10` + cache.emplace(build_t { + .nx = current.nx - 0x010000, // --n3 + .prefix = current.prefix | (MASK_10 >> current.offset), + .offset = current.offset + 2, + }); } - for (uint32_t base : cache_2) { // traverse second layer - uint32_t range = 0; - for (int i = 0; i < len; ++i) { // generate range base on binary value - if ((bin >> i) & 0b1) { // non-zero bit - (range >>= 2) |= MASK_11; - continue; - } - (range >>= 2) |= base & MASK_11; - base <<= 2; - } - BasicRanges::data.emplace_back(range); // insert into release data + if (current.nx & 0xFF000000) { // n4 -> `11` + cache.emplace(build_t { + .nx = current.nx - 0x01000000, // --n4 + .prefix = current.prefix | (MASK_11 >> current.offset), + .offset = current.offset + 2, + }); } + cache.pop(); // remove searched case } } diff --git a/src/all_cases/basic_ranges.h b/src/all_cases/basic_ranges.h index 9ed202b..7e90093 100644 --- a/src/all_cases/basic_ranges.h +++ b/src/all_cases/basic_ranges.h @@ -18,10 +18,18 @@ public: static const std::vector* fetch(); private: + struct generate_t { + int n1; // number of `00` + int n2; // number of `01` + int n3; // number of `10` + int n4; // number of `11` + }; + static bool available; static std::mutex building; static std::vector data; static void build_data(); - static void generate(int n1, int n2, int n3, int n4); + static void sort_data(std::vector &flags, std::vector &raw); + static void generate(std::vector &release, generate_t info); }; diff --git a/src/main.cc b/src/main.cc index 2abbe27..23ccc10 100644 --- a/src/main.cc +++ b/src/main.cc @@ -242,9 +242,8 @@ int main() { // } - load_ranges(); - -// BasicRanges::build(); +// load_ranges(); + BasicRanges::build(); std::cerr << (clock() - start_time) * 1000 / CLOCKS_PER_SEC << "ms" << std::endl; // std::cerr << (clock() - start_time) * 1000000 / CLOCKS_PER_SEC << "us" << std::endl;