Browse Source

perf: faster BasicRange implementation

master
Dnomd343 1 month ago
parent
commit
5368a48a8d
  1. 46
      src/core/benchmark/group.cc
  2. 4
      src/core/group/group.h
  3. 148
      src/core/group/internal/group.cc
  4. 17
      src/core/main.cc
  5. 26
      src/core_test/group_tmp/group_extend.cc

46
src/core/benchmark/group.cc

@ -1,9 +1,11 @@
#include <iostream>
#include <benchmark/benchmark.h>
#include <group/group.h>
#define private public
#include "group/group.h"
#include "all_cases/all_cases.h"
#undef private
using klotski::cases::AllCases;
@ -128,14 +130,14 @@ static void GroupExtend(benchmark::State &state) {
//
// }
static std::vector<std::tuple<int, int, int, int>> target_nums() {
static std::vector<std::tuple<int, int, int>> target_nums() {
std::vector<std::tuple<int, int, int, int>> results;
std::vector<std::tuple<int, int, int>> results;
for (int n = 0; n <= 7; ++n) {
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) {
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) {
results.emplace_back(16 - n*2 - n_1x1, n - n_2x1, n_2x1, n_1x1);
results.emplace_back(n, n_2x1, n_1x1);
}
}
}
@ -148,27 +150,29 @@ static void SpawnRanges(benchmark::State &state) {
auto nums = target_nums();
// std::cout << nums.size() << std::endl;
//
// for (auto [n1, n2, n3, n4] : nums) {
// if (n1 == 2 && n2 == 1 && n3 == 4 && n4 == 4) {
//
// std::cout << "ok" << std::endl;
//
// }
// }
for (auto _ : state) {
for (auto [n, n_2x1, n_1x1] : nums) {
// klotski::cases::spawn_ranges(n, n_2x1, n_1x1);
}
}
}
// klotski::cases::spawn_ranges(2, 1, 4, 4);
static void BasicRanges(benchmark::State &state) {
for (auto [n1, n2, n3, n4] : nums) {
klotski::cases::spawn_ranges(n1, n2, n3, n4);
}
for (auto _ : state) {
klotski::cases::basic_ranges();
}
}
static void OriginBasicRanges(benchmark::State &state) {
for (auto _ : state) {
auto &kk = klotski::cases::BasicRanges::instance();
kk.build_ranges(kk.get_ranges());
}
}
// BENCHMARK(CommonCodeToTypeId)->Arg(8)->Arg(64)->Arg(256);
// BENCHMARK(RawCodeToTypeId)->Arg(8)->Arg(64)->Arg(256);
@ -176,6 +180,10 @@ static void SpawnRanges(benchmark::State &state) {
// BENCHMARK(FilterFromAllCases)->Unit(benchmark::kMillisecond);
BENCHMARK(SpawnRanges)->Unit(benchmark::kMillisecond);
// BENCHMARK(SpawnRanges)->Unit(benchmark::kMillisecond);
BENCHMARK(BasicRanges)->Unit(benchmark::kMillisecond);
// BENCHMARK(OriginBasicRanges)->Unit(benchmark::kMillisecond);
BENCHMARK_MAIN();

4
src/core/group/group.h

@ -80,7 +80,9 @@ uint32_t raw_code_to_type_id(uint64_t raw_code);
std::vector<uint64_t> group_extend_from_seed(uint64_t raw_code);
std::vector<uint32_t> spawn_ranges(int n_00, int n_01, int n_10, int n_11);
void spawn_ranges(std::vector<uint32_t> &ranges, int n, int n_2x1, int n_1x1);
std::vector<uint32_t> basic_ranges();
class Group;

148
src/core/group/internal/group.cc

@ -63,7 +63,7 @@ std::vector<uint64_t> klotski::cases::group_extend_from_seed(uint64_t raw_code)
}
template<int N>
static std::vector<uint32_t> demo(int n_10, int n_11) {
static void demo(std::vector<uint32_t> &ranges, int n_10, int n_11) {
constexpr auto num = 16 - N;
constexpr auto offset = (16 - num) << 1; // offset of low bits
@ -73,23 +73,11 @@ static std::vector<uint32_t> demo(int n_10, int n_11) {
std::array<int, num> series {};
// for (int k = 0; k < n_00; ++k) {
// series[k] = 0b00;
// }
// for (int k = n_00; k < n_00 + n_01; ++k) {
// series[k] = 0b01;
// }
auto kk = std::fill_n(series.begin() + n_00, n_01, 0b01);
auto pp = std::fill_n(kk, n_10, 0b10);
std::fill_n(pp, n_11, 0b11);
// for (auto x : series) {
// std::cout << x << " ";
// }
// std::cout << std::endl;
std::vector<uint32_t> ranges;
// std::vector<uint32_t> ranges;
do {
uint32_t range = 0;
@ -98,59 +86,109 @@ static std::vector<uint32_t> demo(int n_10, int n_11) {
ranges.emplace_back(range << offset);
} while (std::ranges::next_permutation(series).found);
return ranges;
// return ranges;
}
std::vector<uint32_t> klotski::cases::spawn_ranges(int n_00, int n_01, int n_10, int n_11) {
void klotski::cases::spawn_ranges(std::vector<uint32_t> &ranges, int n, int n_2x1, int n_1x1) {
auto n = n_01 + n_10;
// auto n = n_01 + n_2x1;
// std::vector<uint32_t> ranges;
switch (n) {
case 0: return demo<0>(n_10, n_11);
case 1: return demo<1>(n_10, n_11);
case 2: return demo<2>(n_10, n_11);
case 3: return demo<3>(n_10, n_11);
case 4: return demo<4>(n_10, n_11);
case 5: return demo<5>(n_10, n_11);
case 6: return demo<6>(n_10, n_11);
case 7: return demo<7>(n_10, n_11);
default: return {};
case 0: return demo<0>(ranges, n_2x1, n_1x1);
case 1: return demo<1>(ranges, n_2x1, n_1x1);
case 2: return demo<2>(ranges, n_2x1, n_1x1);
case 3: return demo<3>(ranges, n_2x1, n_1x1);
case 4: return demo<4>(ranges, n_2x1, n_1x1);
case 5: return demo<5>(ranges, n_2x1, n_1x1);
case 6: return demo<6>(ranges, n_2x1, n_1x1);
case 7: return demo<7>(ranges, n_2x1, n_1x1);
default: return;
}
}
consteval std::array<std::tuple<int, int, int>, 204> target_nums() {
std::array<std::tuple<int, int, int>, 204> results;
for (int i = 0, n = 0; n <= 7; ++n) {
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) {
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) {
results[i++] = {n, n_2x1, n_1x1};
}
}
}
return results;
}
// return demo<5>(n_10, n_11);
using RangeIter = std::vector<uint32_t>::iterator;
// auto num = n_00 + n_01 + n_10 + n_11;
// auto offset = (16 - num) << 1; // offset of low bits
static void combine_sort(RangeIter begin, RangeIter mid, RangeIter end) noexcept {
// std::vector<int> series;
// series.reserve(num);
// series.insert(series.end(), n_00, 0b00);
// series.insert(series.end(), n_01, 0b01);
// series.insert(series.end(), n_10, 0b10);
// series.insert(series.end(), n_11, 0b11);
// std::inplace_merge(begin, mid, end);
// return;
// std::array<int, 11> series {
// 0b00, 0b00,
// 0b01,
// 0b10, 0b10, 0b10, 0b10,
// 0b11, 0b11, 0b11, 0b11,
// };
// std::vector<uint32_t> results;
// results.resize(end - begin);
// std::merge(begin, mid, mid, end, results.begin());
// std::copy(results.begin(), results.end(), begin);
// return;
// std::vector<uint32_t> ranges;
std::vector<uint32_t> tmp = {begin, mid}; // left array backup
auto p = tmp.begin();
for (;;) {
if (*p <= *mid) {
*(begin++) = *(p++); // stored in original span
if (p == tmp.end()) // left array is consumed
return;
continue;
}
*(begin++) = *(mid++); // stored in original span
if (mid == end) { // right array is consumed
std::copy(p, tmp.end(), begin); // left array remaining
return;
}
}
}
// do { // full permutation traversal
// uint32_t range = 0;
// for (const auto x : series) // store every 2-bit
// (range <<= 2) |= x;
// ranges.emplace_back(range << offset);
// } while (std::next_permutation(series.begin(), series.end()));
std::vector<uint32_t> klotski::cases::basic_ranges() {
// do {
// uint32_t range = 0;
// for (const auto x : series) // store every 2-bit
// (range <<= 2) |= x;
// ranges.emplace_back(range << offset);
// } while (std::ranges::next_permutation(series).found);
std::vector<uint32_t> results;
results.reserve(7311921);
// return ranges;
std::list<std::vector<uint32_t>::iterator> flags {results.begin()}; // mark ordered interval
for (auto [n, n_2x1, n_1x1] : target_nums()) {
spawn_ranges(results, n, n_2x1, n_1x1);
flags.emplace_back(results.end());
// auto sub_ranges = spawn_ranges(results, n, n_2x1, n_1x1);
// results.insert(results.end(), sub_ranges.begin(), sub_ranges.end());
}
// std::ranges::sort(results.begin(), results.end());
// std::ranges::stable_sort(results.begin(), results.end());
do {
decltype(flags.begin()) begin = flags.begin(), mid, end;
while (++(mid = begin) != flags.end() && ++(end = mid) != flags.end()) {
combine_sort(*begin, *mid, *end); // merge two ordered interval
flags.erase(mid);
begin = end;
}
} while (flags.size() > 2); // merge until only one interval remains
for (auto &x : results) {
x = range_reverse(x);
}
return results;
// std::vector<uint32_t> kk;
// kk.reserve(7311921);
// std::ranges::transform(results.begin(), results.end(), kk.begin(), range_reverse);
// return kk;
}

17
src/core/main.cc

@ -26,7 +26,22 @@ using klotski::codec::SHORT_CODE_LIMIT;
int main() {
const auto start = clock();
klotski::cases::spawn_ranges(2, 1, 4, 4);
// klotski::cases::spawn_ranges(2, 1, 4, 4);
std::vector<uint32_t> r1 {1, 4, 5, 9, 0, 2, 3, 6, 7, 8};
auto begin = r1.begin();
auto mid = r1.begin() + 4;
auto end = r1.end();
std::vector<uint32_t> results;
results.resize(end - begin);
std::merge(begin, mid, mid, end, results.begin());
std::copy(results.begin(), results.end(), begin);
for (auto x : r1) {
std::cout << x << " ";
}
std::cout << std::endl;
// std::vector<int> series {1, 2, 3, 4};

26
src/core_test/group_tmp/group_extend.cc

@ -21,14 +21,14 @@ TEST(Group, group_extend) {
EXPECT_EQ(hash_ret, 0x91BD28A749312A6D);
}
static std::vector<std::tuple<int, int, int, int>> target_nums() {
static std::vector<std::tuple<int, int, int>> target_nums() {
std::vector<std::tuple<int, int, int, int>> results;
std::vector<std::tuple<int, int, int>> results;
for (int n = 0; n <= 7; ++n) {
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) {
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) {
results.emplace_back(16 - n*2 - n_1x1, n - n_2x1, n_2x1, n_1x1);
results.emplace_back(n, n_2x1, n_1x1);
}
}
}
@ -46,11 +46,11 @@ TEST(Group, ranges) {
// EXPECT_EQ(hash_ret, 0xF6F87606E4205EAF);
std::vector<uint32_t> ranges;
for (auto [n1, n2, n3, n4] : target_nums()) {
for (auto [n, n_2x1, n_1x1] : target_nums()) {
auto kk = klotski::cases::spawn_ranges(n1, n2, n3, n4);
// auto kk = klotski::cases::spawn_ranges(n, n_2x1, n_1x1);
ranges.insert(ranges.end(), kk.begin(), kk.end());
// ranges.insert(ranges.end(), kk.begin(), kk.end());
}
EXPECT_EQ(ranges.size(), 7311921);
@ -60,3 +60,17 @@ TEST(Group, ranges) {
EXPECT_EQ(hash_ret, 0xA1E247B01D5A9545);
}
TEST(Group, basic_ranges) {
auto ret = klotski::cases::basic_ranges();
// std::cout << ret.size() << std::endl;
EXPECT_EQ(ret.size(), 7311921);
auto hash_ret = hash::xxh3(ret);
// std::cout << std::format("{:X}", hash_ret) << std::endl;
// EXPECT_EQ(hash_ret, 0xA1E247B01D5A9545); // no sorted
// EXPECT_EQ(hash_ret, 0x00A926AB1121230D); // no reversed
EXPECT_EQ(hash_ret, 0x82B040060044E336);
}

Loading…
Cancel
Save