Browse Source

perf: high performance basic ranges build

master
Dnomd343 10 months ago
parent
commit
d163791aa0
  1. 10
      src/CMakeLists.txt
  2. 9
      src/core/CMakeLists.txt
  3. 249
      src/core/all_cases/basic_ranges.cc
  4. 80
      src/core/all_cases/basic_ranges.h
  5. 43
      src/core/main.cc

10
src/CMakeLists.txt

@ -2,10 +2,12 @@ cmake_minimum_required(VERSION 3.0)
project(klotski-cli LANGUAGES C)
add_subdirectory(klotski_core)
include_directories(klotski_core)
add_subdirectory(core)
add_executable(cli main.c)
target_link_libraries(cli PRIVATE klotski absl::flat_hash_map)
#add_subdirectory(klotski_core)
#include_directories(klotski_core)
#add_executable(cli main.c)
#target_link_libraries(cli PRIVATE klotski absl::flat_hash_map)
# -labsl_hash -labsl_city -labsl_low_level_hash -labsl_raw_hash_set

9
src/core/CMakeLists.txt

@ -0,0 +1,9 @@
cmake_minimum_required(VERSION 3.0)
set(CMAKE_CXX_STANDARD 20)
project(klotski-core VERSION 0.1.2 LANGUAGES CXX)
add_compile_options(-fno-exceptions)
add_executable(${PROJECT_NAME} main.cc all_cases/basic_ranges.cc)

249
src/core/all_cases/basic_ranges.cc

@ -0,0 +1,249 @@
#include <iostream>
#include <vector>
#include <list>
#include <algorithm>
#include "basic_ranges.h"
static uint32_t range_reverse(uint32_t bin) noexcept {
// bin = ((bin << 16) & 0xFFFF0000) | ((bin >> 16) & 0x0000FFFF);
// bin = ((bin << 8) & 0xFF00FF00) | ((bin >> 8) & 0x00FF00FF);
bin = __builtin_bswap32(bin);
bin = ((bin << 4) & 0xF0F0F0F0) | ((bin >> 4) & 0x0F0F0F0F);
return ((bin << 2) & 0xCCCCCCCC) | ((bin >> 2) & 0x33333333);
}
void build(std::vector<uint32_t> &result, int n1, int n2, int n3, int n4) {
std::vector<int> demo;
demo.reserve(n1 + n2 + n3 + n4);
int move_num = 32 - (n1 + n2 + n3 + n4) * 2;
demo.insert(demo.end(), n1, 0b00);
demo.insert(demo.end(), n2, 0b01);
demo.insert(demo.end(), n3, 0b10);
demo.insert(demo.end(), n4, 0b11);
// for (int i = 0; i < n1; ++i) {
// demo.emplace_back(0b00);
// }
// for (int i = 0; i < n2; ++i) {
// demo.emplace_back(0b01);
// }
// for (int i = 0; i < n3; ++i) {
// demo.emplace_back(0b10);
// }
// for (int i = 0; i < n4; ++i) {
// demo.emplace_back(0b11);
// }
// for (auto x : demo) {
// std::cout << x << " ";
// }
// std::cout << std::endl;
do {
uint32_t tmp = 0;
for (auto x : demo) {
tmp <<= 2;
tmp |= x;
}
tmp <<= move_num;
// volatile auto r = tmp;
result.emplace_back(tmp);
} while (next_permutation(demo.begin(), demo.end()));
}
template <class T>
void sort(T begin, T end, T kk) {
std::vector<uint32_t> tmp;
tmp.reserve(end - begin);
auto k1 = begin;
auto k2 = kk;
while (1 == 1) {
if (*k1 < *k2) {
tmp.emplace_back(*k1);
++k1;
if (k1 == kk) {
tmp.insert(tmp.end(), k2, end);
break;
}
} else {
tmp.emplace_back(*k2);
++k2;
if (k2 == end) {
tmp.insert(tmp.end(), k1, kk);
break;
}
}
}
auto p = begin;
for (auto x : tmp) {
*p = x;
++p;
}
}
template<class ITER_T>
void sort_v2(ITER_T begin, ITER_T end, ITER_T mid) {
std::vector<uint32_t> tmp {begin, mid};
auto k1 = tmp.begin();
auto k2 = mid;
auto target = begin;
while (1 == 1) {
// std::cout << *k1 << " vs " << *k2 << std::endl;
if (*k1 < *k2) {
*target = *k1;
++target;
// tmp.emplace_back(*k1);
++k1;
if (k1 == tmp.end()) {
// memcpy(&*target, &*k2, end - k2);
// tmp.insert(tmp.end(), k2, end);
break;
}
} else {
*target = *k2;
++target;
// tmp.emplace_back(*k2);
++k2;
if (k2 == end) {
// std::cout << "get it" << std::endl;
// std::cout << "size = " << (tmp.end() - k1) << std::endl;
// std::cout << (target - begin)
memcpy(&*target, &*k1, (tmp.end() - k1) * 4);
// tmp.insert(tmp.end(), k1, kk);
break;
}
}
}
//
// auto p = begin;
// for (auto x : tmp) {
// *p = x;
// ++p;
// }
}
template<class ITER_T>
void sort_v3(ITER_T begin, ITER_T end, ITER_T mid) {
std::vector<uint32_t> tmp = {begin, mid};
auto p = tmp.begin();
for (;;) {
if (*p < *mid) {
*(begin++) = *(p++);
if (p == tmp.end()) {
return;
}
} else {
*(begin++) = *(mid++);
if (mid == end) {
std::copy(p, tmp.end(), begin);
return;
}
}
}
}
void demo() {
// std::vector<uint32_t> demo = {1, 2, 5, 9, 11, 3, 6, 7};
// auto kk = demo.begin() + 5;
// for (auto p = demo.begin(); p < kk; ++p) {
// std::cout << *p << std::endl;
// }
// sort_v3(demo.begin(), demo.end(), kk);
//
// for (auto x : demo) {
// std::cout << x << std::endl;
// }
// return;
// uint32_t tmp = 0x5129B263; // 0xC98E6845
//
// for (uint64_t i = 0; i < 0xFFFFFFFF; ++i) {
// volatile auto r = range_reverse(tmp);
// }
// printf("%08X\n", tmp);
std::vector<uint32_t> result;
result.reserve(7311921);
// build(result, 4, 2, 1, 6);
std::list<decltype(result.begin())> pp;
pp.emplace_back(result.begin());
for (int n = 0; n <= 7; ++n) // number of 1x2 and 2x1 block -> 0 ~ 7
for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) // number of 2x1 block -> 0 ~ n
for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) { // number of 1x1 block -> 0 ~ (14 - 2n)
build(result, 16 - n * 2 - n_1x1, n - n_2x1, n_2x1, n_1x1);
pp.emplace_back(result.end());
}
// std::stable_sort(result.begin(), result.end());
// return;
// sort(result.begin() + 363149, result.begin() + 459674, result.begin() + 408194);
while (1 == 1) {
auto begin = pp.begin();
while (1 == 1) {
auto mid = begin;
++mid;
if (mid == pp.end()) {
break;
}
auto end = mid;
++end;
if (end == pp.end()) {
break;
}
// std::cout << (*begin - result.begin()) << " " << (*mid - result.begin()) << " " << (*end - result.begin()) << std::endl;
sort_v3(*begin, *end, *mid);
++begin;
++begin;
pp.erase(mid);
}
if (pp.size() == 2) {
break;
}
}
// std::sort(result.begin(), result.end());
// std::stable_sort(result.begin(), result.end());
for (auto &x : result) {
x = range_reverse(x);
}
for (auto x : result) {
printf("%08X\n", x);
}
// for (uint32_t i = 0; i < 0xFF; ++i) {
// build(result, 4, 2, 1, 6);
// }
}

80
src/core/all_cases/basic_ranges.h

@ -0,0 +1,80 @@
#pragma once
/// Based on the requirements of valid klotski, the `2x2` block that must exist
/// and only one, witch will occupy 4 empty slots, and the remaining 16 slots
/// will be allocated to space, `1x2`, `2x1` and `1x1`. Then, according to the
/// rules of CommonCode, they are coded as `00` `01` `10` `11` respectively, and
/// the remaining positions are filled with `0` and stored as 32-bit variables.
/// As we all know, a space or `1x1` block will occupy 1 slot, `1x2` or `2x1`
/// block will occupy 2 slots, and together they fill 16 positions, so all
/// possible combinations can be calculated, this number is 204. Each combination
/// can produce different permutations. After verification, there are a total of
/// 7311921 possible permutations. The goal of BasicRanges is to find these
/// permutations, sort them and store them in a `uint32_t` array.
/// In terms of algorithms, there are two options: the first is to generate
/// out-of-order data and then quickly arrange them; the second is to generate
/// ordered data for 204 combinations, and then merge and sort them. After testing,
/// the former is faster in generation (consuming T time), but it will consume
/// more time in sorting (about 7T), and the latter will cost about 2T in
/// generation due to the loss of the tree structure queue. But it can save more
/// time in sorting, which is about 2T, so the second solution will get the result
/// faster.
/// Finally, due to the performance considerations of AllCases, the resulting data
/// will be flipped every two bits, which will not consume too much time (less than
/// 10% of T), but can almost double the speed of the subsequent `check_range`.
#include <mutex>
#include <vector>
#include <cstdint>
void demo();
namespace klotski {
/// basic ranges count
const uint32_t BASIC_RANGES_SIZE = 7311921;
class BasicRanges {
public:
/// Three basic states, one-way transition.
/// {NOT_INIT} -> {BUILDING} -> {AVAILABLE}
enum Status {
NOT_INIT,
BUILDING,
AVAILABLE,
};
typedef std::vector<uint32_t> basic_ranges_t;
/// Trigger the build process, from `NOT_INIT` to `BUILDING`.
static void build();
/// Get current status of BasicRanges.
static Status status() noexcept;
/// Blocking access to constructed data.
static const basic_ranges_t& fetch();
private:
static bool available_;
static std::mutex building_;
static basic_ranges_t data_;
static void build_data();
public:
/// The number of types of blocks.
struct generate_t {
int n1; // number of `00` -> space
int n2; // number of `01` -> 1x2 block
int n3; // number of `10` -> 2x1 block
int n4; // number of `11` -> 1x1 block
};
/// Generate all basic-ranges of the specified type.
static void generate(basic_ranges_t &release, generate_t info);
};
} // namespace klotski

43
src/core/main.cc

@ -0,0 +1,43 @@
#include <iostream>
#include <algorithm>
#include "all_cases/basic_ranges.h"
int main() {
auto start = clock();
demo();
// for (int n = 0; n <= 7; ++n) // number of 1x2 and 2x1 block -> 0 ~ 7
// for (int n_2x1 = 0; n_2x1 <= n; ++n_2x1) // number of 2x1 block -> 0 ~ n
// for (int n_1x1 = 0; n_1x1 <= (14 - n * 2); ++n_1x1) // number of 1x1 block -> 0 ~ (14 - 2n)
// build(16 - n * 2 - n_1x1, n - n_2x1, n_2x1, n_1x1);
// generate(generate_t { // generate target ranges
// .n1 = 16 - n * 2 - n_1x1, /// space -> 00
// .n2 = n - n_2x1, /// 1x2 -> 01
// .n3 = n_2x1, /// 2x1 -> 10
// .n4 = n_1x1, /// 1x1 -> 11
// });
// std::stable_sort(result.begin(), result.end());
// build(4, 2, 1, 6);
std::cerr << ((clock() - start) * 1000 / CLOCKS_PER_SEC) << "ms" << std::endl;
// std::cout << result.size() << std::endl;
// std::vector<uintmax_t> demo {0b00, 0b01, 0b10, 0b11};
// do {
// std::cout << demo[0] << " " << demo[1] << " " << demo[2] << " " << demo[3] << std::endl;
// } while (next_permutation(demo.begin(), demo.end()));
return 0;
}
Loading…
Cancel
Save