Browse Source

perf: faster BasicRange implementation

legacy
Dnomd343 7 months ago
parent
commit
5368a48a8d
  1. 46
      src/core/benchmark/group.cc
  2. 4
      src/core/group/group.h
  3. 148
      src/core/group/internal/group.cc
  4. 17
      src/core/main.cc
  5. 26
      src/core_test/group_tmp/group_extend.cc

46
src/core/benchmark/group.cc

@ -1,9 +1,11 @@
#include <iostream> #include <iostream>
#include <benchmark/benchmark.h> #include <benchmark/benchmark.h>
#include <group/group.h>
#define private public
#include "group/group.h"
#include "all_cases/all_cases.h" #include "all_cases/all_cases.h"
#undef private
using klotski::cases::AllCases; using klotski::cases::AllCases;
@ -128,14 +130,14 @@ static void GroupExtend(benchmark::State &state) {
// //
// } // }
static std::vector<std::tuple<int, int, int, int>> target_nums() { static std::vector<std::tuple<int, int, int>> target_nums() {
std::vector<std::tuple<int, int, int, int>> results; std::vector<std::tuple<int, int, int>> results;
for (int n = 0; n <= 7; ++n) { for (int n = 0; n <= 7; ++n) {
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) { for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) {
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) {
results.emplace_back(16 - n*2 - n_1x1, n - n_2x1, n_2x1, n_1x1); results.emplace_back(n, n_2x1, n_1x1);
} }
} }
} }
@ -148,27 +150,29 @@ static void SpawnRanges(benchmark::State &state) {
auto nums = target_nums(); auto nums = target_nums();
// std::cout << nums.size() << std::endl;
//
// for (auto [n1, n2, n3, n4] : nums) {
// if (n1 == 2 && n2 == 1 && n3 == 4 && n4 == 4) {
//
// std::cout << "ok" << std::endl;
//
// }
// }
for (auto _ : state) { for (auto _ : state) {
for (auto [n, n_2x1, n_1x1] : nums) {
// klotski::cases::spawn_ranges(n, n_2x1, n_1x1);
}
}
}
// klotski::cases::spawn_ranges(2, 1, 4, 4); static void BasicRanges(benchmark::State &state) {
for (auto [n1, n2, n3, n4] : nums) { for (auto _ : state) {
klotski::cases::spawn_ranges(n1, n2, n3, n4); klotski::cases::basic_ranges();
}
} }
} }
static void OriginBasicRanges(benchmark::State &state) {
for (auto _ : state) {
auto &kk = klotski::cases::BasicRanges::instance();
kk.build_ranges(kk.get_ranges());
}
}
// BENCHMARK(CommonCodeToTypeId)->Arg(8)->Arg(64)->Arg(256); // BENCHMARK(CommonCodeToTypeId)->Arg(8)->Arg(64)->Arg(256);
// BENCHMARK(RawCodeToTypeId)->Arg(8)->Arg(64)->Arg(256); // BENCHMARK(RawCodeToTypeId)->Arg(8)->Arg(64)->Arg(256);
@ -176,6 +180,10 @@ static void SpawnRanges(benchmark::State &state) {
// BENCHMARK(FilterFromAllCases)->Unit(benchmark::kMillisecond); // BENCHMARK(FilterFromAllCases)->Unit(benchmark::kMillisecond);
BENCHMARK(SpawnRanges)->Unit(benchmark::kMillisecond); // BENCHMARK(SpawnRanges)->Unit(benchmark::kMillisecond);
BENCHMARK(BasicRanges)->Unit(benchmark::kMillisecond);
// BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond);
BENCHMARK_MAIN(); BENCHMARK_MAIN();

4
src/core/group/group.h

@ -80,7 +80,9 @@ uint32_t raw_code_to_type_id(uint64_t raw_code);
std::vector<uint64_t> group_extend_from_seed(uint64_t raw_code); std::vector<uint64_t> group_extend_from_seed(uint64_t raw_code);
std::vector<uint32_t> spawn_ranges(int n_00, int n_01, int n_10, int n_11); void spawn_ranges(std::vector<uint32_t> &ranges, int n, int n_2x1, int n_1x1);
std::vector<uint32_t> basic_ranges();
class Group; class Group;

148
src/core/group/internal/group.cc

@ -63,7 +63,7 @@ std::vector<uint64_t> klotski::cases::group_extend_from_seed(uint64_t raw_code)
} }
template<int N> template<int N>
static std::vector<uint32_t> demo(int n_10, int n_11) { static void demo(std::vector<uint32_t> &ranges, int n_10, int n_11) {
constexpr auto num = 16 - N; constexpr auto num = 16 - N;
constexpr auto offset = (16 - num) << 1; // offset of low bits constexpr auto offset = (16 - num) << 1; // offset of low bits
@ -73,23 +73,11 @@ static std::vector<uint32_t> demo(int n_10, int n_11) {
std::array<int, num> series {}; std::array<int, num> series {};
// for (int k = 0; k < n_00; ++k) {
// series[k] = 0b00;
// }
// for (int k = n_00; k < n_00 + n_01; ++k) {
// series[k] = 0b01;
// }
auto kk = std::fill_n(series.begin() + n_00, n_01, 0b01); auto kk = std::fill_n(series.begin() + n_00, n_01, 0b01);
auto pp = std::fill_n(kk, n_10, 0b10); auto pp = std::fill_n(kk, n_10, 0b10);
std::fill_n(pp, n_11, 0b11); std::fill_n(pp, n_11, 0b11);
// for (auto x : series) { // std::vector<uint32_t> ranges;
// std::cout << x << " ";
// }
// std::cout << std::endl;
std::vector<uint32_t> ranges;
do { do {
uint32_t range = 0; uint32_t range = 0;
@ -98,59 +86,109 @@ static std::vector<uint32_t> demo(int n_10, int n_11) {
ranges.emplace_back(range << offset); ranges.emplace_back(range << offset);
} while (std::ranges::next_permutation(series).found); } while (std::ranges::next_permutation(series).found);
return ranges; // return ranges;
} }
std::vector<uint32_t> klotski::cases::spawn_ranges(int n_00, int n_01, int n_10, int n_11) { void klotski::cases::spawn_ranges(std::vector<uint32_t> &ranges, int n, int n_2x1, int n_1x1) {
// auto n = n_01 + n_2x1;
auto n = n_01 + n_10; // std::vector<uint32_t> ranges;
switch (n) { switch (n) {
case 0: return demo<0>(n_10, n_11); case 0: return demo<0>(ranges, n_2x1, n_1x1);
case 1: return demo<1>(n_10, n_11); case 1: return demo<1>(ranges, n_2x1, n_1x1);
case 2: return demo<2>(n_10, n_11); case 2: return demo<2>(ranges, n_2x1, n_1x1);
case 3: return demo<3>(n_10, n_11); case 3: return demo<3>(ranges, n_2x1, n_1x1);
case 4: return demo<4>(n_10, n_11); case 4: return demo<4>(ranges, n_2x1, n_1x1);
case 5: return demo<5>(n_10, n_11); case 5: return demo<5>(ranges, n_2x1, n_1x1);
case 6: return demo<6>(n_10, n_11); case 6: return demo<6>(ranges, n_2x1, n_1x1);
case 7: return demo<7>(n_10, n_11); case 7: return demo<7>(ranges, n_2x1, n_1x1);
default: return {}; default: return;
} }
// return demo<5>(n_10, n_11); }
// auto num = n_00 + n_01 + n_10 + n_11; consteval std::array<std::tuple<int, int, int>, 204> target_nums() {
// auto offset = (16 - num) << 1; // offset of low bits std::array<std::tuple<int, int, int>, 204> results;
for (int i = 0, n = 0; n <= 7; ++n) {
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) {
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) {
results[i++] = {n, n_2x1, n_1x1};
}
}
}
return results;
}
// std::vector<int> series; using RangeIter = std::vector<uint32_t>::iterator;
// series.reserve(num);
// series.insert(series.end(), n_00, 0b00);
// series.insert(series.end(), n_01, 0b01);
// series.insert(series.end(), n_10, 0b10);
// series.insert(series.end(), n_11, 0b11);
// std::array<int, 11> series { static void combine_sort(RangeIter begin, RangeIter mid, RangeIter end) noexcept {
// 0b00, 0b00,
// 0b01,
// 0b10, 0b10, 0b10, 0b10,
// 0b11, 0b11, 0b11, 0b11,
// };
// std::vector<uint32_t> ranges; // std::inplace_merge(begin, mid, end);
// return;
// do { // full permutation traversal // std::vector<uint32_t> results;
// uint32_t range = 0; // results.resize(end - begin);
// for (const auto x : series) // store every 2-bit // std::merge(begin, mid, mid, end, results.begin());
// (range <<= 2) |= x; // std::copy(results.begin(), results.end(), begin);
// ranges.emplace_back(range << offset); // return;
// } while (std::next_permutation(series.begin(), series.end()));
// do { std::vector<uint32_t> tmp = {begin, mid}; // left array backup
// uint32_t range = 0; auto p = tmp.begin();
// for (const auto x : series) // store every 2-bit for (;;) {
// (range <<= 2) |= x; if (*p <= *mid) {
// ranges.emplace_back(range << offset); *(begin++) = *(p++); // stored in original span
// } while (std::ranges::next_permutation(series).found); if (p == tmp.end()) // left array is consumed
return;
continue;
}
*(begin++) = *(mid++); // stored in original span
if (mid == end) { // right array is consumed
std::copy(p, tmp.end(), begin); // left array remaining
return;
}
}
}
// return ranges; std::vector<uint32_t> klotski::cases::basic_ranges() {
std::vector<uint32_t> results;
results.reserve(7311921);
std::list<std::vector<uint32_t>::iterator> flags {results.begin()}; // mark ordered interval
for (auto [n, n_2x1, n_1x1] : target_nums()) {
spawn_ranges(results, n, n_2x1, n_1x1);
flags.emplace_back(results.end());
// auto sub_ranges = spawn_ranges(results, n, n_2x1, n_1x1);
// results.insert(results.end(), sub_ranges.begin(), sub_ranges.end());
}
// std::ranges::sort(results.begin(), results.end());
// std::ranges::stable_sort(results.begin(), results.end());
do {
decltype(flags.begin()) begin = flags.begin(), mid, end;
while (++(mid = begin) != flags.end() && ++(end = mid) != flags.end()) {
combine_sort(*begin, *mid, *end); // merge two ordered interval
flags.erase(mid);
begin = end;
}
} while (flags.size() > 2); // merge until only one interval remains
for (auto &x : results) {
x = range_reverse(x);
}
return results;
// std::vector<uint32_t> kk;
// kk.reserve(7311921);
// std::ranges::transform(results.begin(), results.end(), kk.begin(), range_reverse);
// return kk;
} }

17
src/core/main.cc

@ -26,7 +26,22 @@ using klotski::codec::SHORT_CODE_LIMIT;
int main() { int main() {
const auto start = clock(); const auto start = clock();
klotski::cases::spawn_ranges(2, 1, 4, 4); // klotski::cases::spawn_ranges(2, 1, 4, 4);
std::vector<uint32_t> r1 {1, 4, 5, 9, 0, 2, 3, 6, 7, 8};
auto begin = r1.begin();
auto mid = r1.begin() + 4;
auto end = r1.end();
std::vector<uint32_t> results;
results.resize(end - begin);
std::merge(begin, mid, mid, end, results.begin());
std::copy(results.begin(), results.end(), begin);
for (auto x : r1) {
std::cout << x << " ";
}
std::cout << std::endl;
// std::vector<int> series {1, 2, 3, 4}; // std::vector<int> series {1, 2, 3, 4};

26
src/core_test/group_tmp/group_extend.cc

@ -21,14 +21,14 @@ TEST(Group, group_extend) {
EXPECT_EQ(hash_ret, 0x91BD28A749312A6D); EXPECT_EQ(hash_ret, 0x91BD28A749312A6D);
} }
static std::vector<std::tuple<int, int, int, int>> target_nums() { static std::vector<std::tuple<int, int, int>> target_nums() {
std::vector<std::tuple<int, int, int, int>> results; std::vector<std::tuple<int, int, int>> results;
for (int n = 0; n <= 7; ++n) { for (int n = 0; n <= 7; ++n) {
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) { for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) {
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) {
results.emplace_back(16 - n*2 - n_1x1, n - n_2x1, n_2x1, n_1x1); results.emplace_back(n, n_2x1, n_1x1);
} }
} }
} }
@ -46,11 +46,11 @@ TEST(Group, ranges) {
// EXPECT_EQ(hash_ret, 0xF6F87606E4205EAF); // EXPECT_EQ(hash_ret, 0xF6F87606E4205EAF);
std::vector<uint32_t> ranges; std::vector<uint32_t> ranges;
for (auto [n1, n2, n3, n4] : target_nums()) { for (auto [n, n_2x1, n_1x1] : target_nums()) {
auto kk = klotski::cases::spawn_ranges(n1, n2, n3, n4); // auto kk = klotski::cases::spawn_ranges(n, n_2x1, n_1x1);
ranges.insert(ranges.end(), kk.begin(), kk.end()); // ranges.insert(ranges.end(), kk.begin(), kk.end());
} }
EXPECT_EQ(ranges.size(), 7311921); EXPECT_EQ(ranges.size(), 7311921);
@ -60,3 +60,17 @@ TEST(Group, ranges) {
EXPECT_EQ(hash_ret, 0xA1E247B01D5A9545); EXPECT_EQ(hash_ret, 0xA1E247B01D5A9545);
} }
TEST(Group, basic_ranges) {
auto ret = klotski::cases::basic_ranges();
// std::cout << ret.size() << std::endl;
EXPECT_EQ(ret.size(), 7311921);
auto hash_ret = hash::xxh3(ret);
// std::cout << std::format("{:X}", hash_ret) << std::endl;
// EXPECT_EQ(hash_ret, 0xA1E247B01D5A9545); // no sorted
// EXPECT_EQ(hash_ret, 0x00A926AB1121230D); // no reversed
EXPECT_EQ(hash_ret, 0x82B040060044E336);
}

Loading…
Cancel
Save