Browse Source

feat: add benchmark and perf of Analyse

master
Dnomd343 2 weeks ago
parent
commit
a130a2b98e
  1. 5
      src/core/CMakeLists.txt
  2. 68
      src/core/analyse/analyse.h
  3. 18
      src/core/analyse/internal/analyse_pro.cc
  4. 79
      src/core/analyse/internal/layer_queue.inl
  5. 60
      src/core/analyse/layer_queue.h
  6. 27
      src/core/benchmark/analyse.cc
  7. 4
      src/core/fast_cal/layer_queue.h
  8. 16
      src/core/main.cc

5
src/core/CMakeLists.txt

@ -97,6 +97,7 @@ set(KLSK_CORE_SRC
analyse/analyse.cc analyse/analyse.cc
analyse/backtrack.cc analyse/backtrack.cc
analyse/internal/analyse_pro.cc
) )
add_library(klotski_core STATIC ${KLSK_CORE_SRC}) add_library(klotski_core STATIC ${KLSK_CORE_SRC})
@ -150,4 +151,8 @@ if (KLSK_ENABLE_BENCHMARK)
add_executable(bm_klsk_fast_cal benchmark/fast_cal.cc) add_executable(bm_klsk_fast_cal benchmark/fast_cal.cc)
target_compile_options(bm_klsk_fast_cal PRIVATE ${KLSK_BENCHMARK_OPTS}) target_compile_options(bm_klsk_fast_cal PRIVATE ${KLSK_BENCHMARK_OPTS})
target_link_libraries(bm_klsk_fast_cal PRIVATE ${KLSK_BENCHMARK_LIBS}) target_link_libraries(bm_klsk_fast_cal PRIVATE ${KLSK_BENCHMARK_LIBS})
add_executable(bm_klsk_analyse benchmark/analyse.cc)
target_compile_options(bm_klsk_analyse PRIVATE ${KLSK_BENCHMARK_OPTS})
target_link_libraries(bm_klsk_analyse PRIVATE ${KLSK_BENCHMARK_LIBS})
endif() endif()

68
src/core/analyse/analyse.h

@ -8,15 +8,20 @@
#include <queue> #include <queue>
#include <cstdint> #include <cstdint>
#include <functional> #include <functional>
#include <print>
#include <unordered_map> #include <unordered_map>
#include "mover/mover.h" #include "mover/mover.h"
#include "raw_code/raw_code.h" #include "raw_code/raw_code.h"
#include "group/group.h"
#include "layer_queue.h"
#include <parallel_hashmap/phmap.h>
namespace klotski { namespace klotski {
// TODO: try double or 4-times size // TODO: try double or 4-times size
const uint32_t ANY_MAP_RESERVE = 65536; const uint32_t ANY_MAP_RESERVE = 65537; // non-prime numbers cause performance issue of libc++
// TODO: Analyse enter klotski namespace later // TODO: Analyse enter klotski namespace later
using namespace klotski; using namespace klotski;
@ -50,12 +55,11 @@ private:
uint64_t root; uint64_t root;
std::queue<analyse_t*> cache; std::queue<analyse_t*> cache;
std::unordered_map<uint64_t, analyse_t> cases; std::unordered_map<uint64_t, analyse_t> cases; // addr of analyse_t will be stable
inline mover::MaskMover init(uint64_t code); inline mover::MaskMover init(uint64_t code);
void new_case(uint64_t code, uint64_t mask); void new_case(uint64_t code, uint64_t mask);
/// backtrack definitions /// backtrack definitions
public: public:
struct track_t { struct track_t {
@ -74,4 +78,62 @@ public:
// TODO: RawCode enable `hash` and `equal_to` trait in namespace std // TODO: RawCode enable `hash` and `equal_to` trait in namespace std
}; };
namespace analyse {
class AnalysePro {
public:
AnalysePro() = delete;
explicit AnalysePro(codec::RawCode code) : seeker_({code}, group::GroupUnion::from_raw_code(code).max_group_size()) {
const auto reserve = group::GroupUnion::from_raw_code(code).max_group_size();
cases_.reserve(static_cast<size_t>(reserve * 1.56));
cases_.emplace(code, info_t {0, 0, {}});
}
void build_all();
private:
KLSK_INLINE void spawn_next(mover::MaskMover &mover) {
auto curr = seeker_.current();
mover.next_cases(curr, cases_.find(curr)->second.mask);
seeker_.next();
}
KLSK_INLINE bool try_emplace(codec::RawCode code, uint64_t mask) {
if (const auto match = cases_.find(code); match != cases_.end()) {
// if (seeker_.layer_num() - 2 > match->second.step) {
// std::println("error!!!");
// }
// std::println("curr: {}, next: {}", seeker_.layer_num() - 2, match->second.step);
if (seeker_.layer_num() - 2 + 1 == match->second.step) {
match->second.mask |= mask; // update mask
match->second.src.emplace_back(seeker_.current());
}
return false;
}
cases_.emplace(code, info_t {
.mask = mask,
.step = static_cast<int>(seeker_.layer_num() - 2 + 1),
.src = {seeker_.current()}
});
seeker_.emplace(code);
return true;
}
struct info_t {
uint64_t mask;
int step;
std::vector<codec::RawCode> src;
};
LayerQueuePro<codec::RawCode> seeker_;
phmap::flat_hash_map<codec::RawCode, info_t> cases_;
};
} // namespace analyse
} // namespace klotski } // namespace klotski

18
src/core/analyse/internal/analyse_pro.cc

@ -0,0 +1,18 @@
#include "analyse/analyse.h"
#include <print>
using klotski::codec::RawCode;
using klotski::mover::MaskMover;
using klotski::analyse::AnalysePro;
void AnalysePro::build_all() {
auto mover = MaskMover([this](const RawCode code, const uint64_t mask) {
try_emplace(code, mask);
});
while (!seeker_.is_ending()) {
// std::println("layer: {}", seeker_.layer_num());
spawn_next(mover);
}
}

79
src/core/analyse/internal/layer_queue.inl

@ -0,0 +1,79 @@
#pragma once
namespace klotski {
template <typename T>
requires std::is_trivial_v<T>
LayerQueuePro<T>::LayerQueuePro(std::initializer_list<T> first_layer, const size_t max_size)
: layer_end_(first_layer.size()), queue_end_(0) {
data_ = static_cast<T*>(std::malloc(sizeof(T) * max_size));
for (const auto node : first_layer) {
emplace(node);
}
layer_offset_.reserve(232); // TODO: confirm the max layer number
layer_offset_.emplace_back(layer_end_);
}
template <typename T>
requires std::is_trivial_v<T>
LayerQueuePro<T>::~LayerQueuePro() {
std::free(data_);
}
template <typename T>
requires std::is_trivial_v<T>
T LayerQueuePro<T>::current() const {
return data_[queue_begin_];
}
template <typename T>
requires std::is_trivial_v<T>
void LayerQueuePro<T>::emplace(T node) {
data_[queue_end_] = node;
++queue_end_;
}
template <typename T>
requires std::is_trivial_v<T>
void LayerQueuePro<T>::next() {
++queue_begin_;
if (queue_begin_ == layer_end_ && !is_ending()) {
layer_begin_ = layer_end_;
layer_end_ = queue_end_;
layer_offset_.emplace_back(layer_end_);
}
}
template <typename T>
requires std::is_trivial_v<T>
[[nodiscard]] bool LayerQueuePro<T>::is_ending() const {
return queue_begin_ == queue_end_;
}
// template <typename T>
// requires std::is_trivial_v<T>
// [[nodiscard]] bool LayerQueuePro<T>::is_new_layer() const {
// return queue_begin_ == layer_begin_;
// }
// template <typename T>
// requires std::is_trivial_v<T>
// std::vector<T> LayerQueuePro<T>::last_layer() const {
// return {data_ + layer_begin_, data_ + layer_end_};
// }
// template <typename T>
// requires std::is_trivial_v<T>
// std::vector<std::vector<T>> LayerQueuePro<T>::all_layers() const {
// std::vector<std::vector<T>> result;
// result.reserve(layer_offset_.size() - 1);
// for (size_t i = 0; i < layer_offset_.size() - 1; ++i) {
// result.emplace_back(std::vector<T> {
// data_ + layer_offset_[i],
// data_ + layer_offset_[i + 1]
// });
// }
// return result;
// }
} // namespace klotski

60
src/core/analyse/layer_queue.h

@ -0,0 +1,60 @@
/// Klotski Engine by Dnomd343 @2024
#pragma once
#include <vector>
namespace klotski {
template <typename T>
requires std::is_trivial_v<T>
class LayerQueuePro final {
public:
~LayerQueuePro();
/// Construct from first layer nodes and reserve size.
LayerQueuePro(std::initializer_list<T> first_layer, size_t max_size);
// ------------------------------------------------------------------------------------- //
/// Pop the head of the queue.
void next();
/// Obtain the current working node.
T current() const;
/// Emplace new node at the end of the queue.
void emplace(T node);
// ------------------------------------------------------------------------------------- //
/// Whether the queue is empty.
[[nodiscard]] bool is_ending() const;
/// Whether the queue front is on new layer.
// [[nodiscard]] bool is_new_layer() const;
// ------------------------------------------------------------------------------------- //
/// Get the nodes of the last layer.
// std::vector<T> last_layer() const;
/// Get all the nodes of each layer.
// std::vector<std::vector<T>> all_layers() const;
// ------------------------------------------------------------------------------------- //
[[nodiscard]] size_t layer_num() const {
return layer_offset_.size();
}
private:
T *data_ {nullptr};
size_t layer_begin_ {0}, layer_end_;
size_t queue_begin_ {0}, queue_end_;
std::vector<size_t> layer_offset_ {0};
};
} // namespace klotski
#include "internal/layer_queue.inl"

27
src/core/benchmark/analyse.cc

@ -0,0 +1,27 @@
#include <benchmark/benchmark.h>
#include "analyse/analyse.h"
#include "common_code/common_code.h"
using klotski::Analyse;
using klotski::analyse::AnalysePro;
static void AnalyseBenchmark(benchmark::State &state) {
auto code = klotski::codec::CommonCode::unsafe_create(0x1A9BF0C00).to_raw_code();
for (auto _ : state) {
// auto analyse = Analyse {code};
// analyse.build();
auto analyse = AnalysePro {code};
analyse.build_all();
}
}
BENCHMARK(AnalyseBenchmark)->Unit(benchmark::kMillisecond);
BENCHMARK_MAIN();

4
src/core/fast_cal/layer_queue.h

@ -44,6 +44,10 @@ public:
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
[[nodiscard]] size_t layer_num() const {
return layer_offset_.size();
}
private: private:
T *data_ {nullptr}; T *data_ {nullptr};
size_t layer_begin_ {0}, layer_end_; size_t layer_begin_ {0}, layer_end_;

16
src/core/main.cc

@ -17,6 +17,7 @@
#include <parallel_hashmap/phmap.h> #include <parallel_hashmap/phmap.h>
using klotski::Analyse; using klotski::Analyse;
using klotski::analyse::AnalysePro;
using klotski::mover::S2Mover; using klotski::mover::S2Mover;
using klotski::mover::MaskMover; using klotski::mover::MaskMover;
@ -51,6 +52,11 @@ int main() {
const auto start = std::chrono::system_clock::now(); const auto start = std::chrono::system_clock::now();
// const auto code = CommonCode::unsafe_create(0x1A9BF0C00).to_raw_code();
const auto code = CommonCode::unsafe_create(0x4FEA13400).to_raw_code();
AnalysePro analyse {code};
analyse.build_all();
// TODO: maybe we can support `std::format` // TODO: maybe we can support `std::format`
// auto group = Group::create(169, 0, Group::Toward::C).value(); // auto group = Group::create(169, 0, Group::Toward::C).value();
@ -143,11 +149,11 @@ int main() {
// } // }
// } // }
GroupCases::build(); // GroupCases::build();
//
constexpr auto group = Group::unsafe_create(169, 0, Group::Toward::C); // constexpr auto group = Group::unsafe_create(169, 0, Group::Toward::C);
constexpr auto info = CaseInfo::unsafe_create(group, 7472); // constexpr auto info = CaseInfo::unsafe_create(group, 7472);
std::cout << info << ": " << GroupCases::obtain_code(info) << std::endl; // std::cout << info << ": " << GroupCases::obtain_code(info) << std::endl;
// constexpr auto group = Group::unsafe_create(89, 0, Group::Toward::A); // constexpr auto group = Group::unsafe_create(89, 0, Group::Toward::A);
// std::cout << group.to_string() << std::endl; // std::cout << group.to_string() << std::endl;

Loading…
Cancel
Save