From b913ba501c550c508dfdf116544abb91f7eed780 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 20 Oct 2024 18:12:48 +0800 Subject: [PATCH] feat: add fast calculate support --- CMakeLists.txt | 11 ++++- src/core/CMakeLists.txt | 5 ++ src/core/benchmark/fast_cal.cc | 23 ++++++++++ src/core/fast_cal/fast_cal.h | 4 ++ src/core/fast_cal/internal/demo.cc | 73 ++++++++++++++++++++++++++++++ src/core/main.cc | 72 ++++++++++++++++++----------- src/core/utils/common.h | 2 + src/core/utils/utility.h | 7 ++- 8 files changed, 169 insertions(+), 28 deletions(-) create mode 100644 src/core/benchmark/fast_cal.cc create mode 100644 src/core/fast_cal/internal/demo.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index afbc06f..1528471 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,16 @@ if (NOT CMAKE_BUILD_TYPE) endif() add_compile_options(-Wall -Wextra) -add_compile_options(-flto=full) # TODO: enabled by LTO option + +# TODO: enabled by LTO option +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + add_compile_options(-flto=full) +else () + add_compile_options(-flto) # TODO: only for g++ +endif () + +# TODO: for python-ffi +#set(CMAKE_POSITION_INDEPENDENT_CODE ON) get_filename_component(KLSK_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} ABSOLUTE) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6f30ee5..51d599a 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -22,6 +22,7 @@ set(KLOTSKI_CORE_SRC fast_cal/internal/cal_core.cc fast_cal/internal/fast_cal.cc + fast_cal/internal/demo.cc group/internal/group_union.cc group/internal/extend.cc @@ -62,3 +63,7 @@ target_link_libraries(bm_ranges PRIVATE klotski::core benchmark::benchmark_main) add_executable(bm_utility benchmark/utility.cc) target_compile_options(bm_utility PRIVATE -fno-rtti -fno-exceptions) target_link_libraries(bm_utility PRIVATE klotski::core benchmark::benchmark_main) + +add_executable(bm_fast_cal benchmark/fast_cal.cc) +target_compile_options(bm_fast_cal PRIVATE -fno-rtti -fno-exceptions) +target_link_libraries(bm_fast_cal PRIVATE klotski::core benchmark::benchmark_main) diff --git a/src/core/benchmark/fast_cal.cc b/src/core/benchmark/fast_cal.cc new file mode 100644 index 0000000..52109ce --- /dev/null +++ b/src/core/benchmark/fast_cal.cc @@ -0,0 +1,23 @@ +#include + +#include "fast_cal/fast_cal.h" + +#include "raw_code/raw_code.h" + +using klotski::codec::CommonCode; + +static void FastCalBenchmark(benchmark::State &state) { + + auto code = CommonCode::unsafe_create(0x1A9BF0C00).to_raw_code(); + + for (auto _ : state) { + // auto fc = FastCal(code); + // benchmark::DoNotOptimize(fc.solve()); + benchmark::DoNotOptimize(FastCal_demo(code)); + } + +} + +BENCHMARK(FastCalBenchmark)->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/src/core/fast_cal/fast_cal.h b/src/core/fast_cal/fast_cal.h index 73de499..9111d0d 100644 --- a/src/core/fast_cal/fast_cal.h +++ b/src/core/fast_cal/fast_cal.h @@ -13,6 +13,8 @@ #include "mover/mover.h" #include "raw_code/raw_code.h" +// #include + using klotski::codec::RawCode; using klotski::mover::MaskMover; @@ -63,3 +65,5 @@ private: inline MaskMover init(uint64_t code); void new_case(uint64_t code, uint64_t mask); }; + +RawCode FastCal_demo(RawCode code); diff --git a/src/core/fast_cal/internal/demo.cc b/src/core/fast_cal/internal/demo.cc new file mode 100644 index 0000000..27be62d --- /dev/null +++ b/src/core/fast_cal/internal/demo.cc @@ -0,0 +1,73 @@ +#include "fast_cal/fast_cal.h" + +#include + +#include + +#include + +#include "mover/mover.h" +#include "group/group.h" + +using klotski::codec::RawCode; +using klotski::codec::CommonCode; +using klotski::cases::RangesUnion; + +using klotski::mover::MaskMover; +using klotski::cases::GroupUnion; + +struct data_t { + uint64_t mask; + uint64_t back; +}; + +// TODO: try using `std::vector` + offset instead of `std::queue` + +RawCode FastCal_demo(RawCode raw_code) { + std::queue cache; + absl::flat_hash_map cases; // + auto reserve = GroupUnion::from_raw_code(raw_code).max_group_size(); + cases.reserve(reserve); + + auto core = MaskMover([&cache, &cases](uint64_t code, uint64_t mask) { + if (const auto match = cases.find(code); match != cases.end()) { + match->second.mask |= mask; // update mask + return; + } + cases.emplace(code, data_t { + .mask = mask, + .back = cache.front().unwrap(), + }); + cache.emplace(RawCode::unsafe_create(code)); + }); + + cache.emplace(raw_code); + cases.emplace(raw_code, data_t {0, 0}); // without mask + + while (!cache.empty()) { + if (((cache.front().unwrap() >> 39) & 0b111) == 0b100) { + break; + } + uint64_t curr = cache.front().unwrap(); + core.next_cases(curr, cases.find(curr)->second.mask); + cache.pop(); + } + +// std::vector path; +// auto code = cache.front().unwrap(); +// while (true) { +// if (code == 0) { +// break; +// } +// path.emplace_back(RawCode::unsafe_create(code)); +// code = cases.find(code)->second.back; +// } +// std::reverse(path.begin(), path.end()); +// for (auto step : path) { +// std::cout << step << std::endl; +// } +// std::cout << path.size() << std::endl; + + return cache.front(); + +} diff --git a/src/core/main.cc b/src/core/main.cc index 4d68c03..87f5dc3 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -42,6 +42,22 @@ int main() { const auto start = std::chrono::system_clock::now(); + auto code = CommonCode::unsafe_create(0x1A9BF0C00).to_raw_code(); + for (int i = 0; i < 100; ++i) { +// FastCal fc {code}; +// fc.solve(); +// klotski::cases::Group_extend(code); + FastCal_demo(code); + } + +// for (int i = 0; i < 10000000; ++i) { +// MaskMover mover([](uint64_t code, uint64_t mask) { +// volatile auto tmp_1 = code; +// volatile auto tmp_2 = mask; +// }); +// mover.next_cases(0x1A9BF0C00, 0); +// } + // ShortCode::speed_up(true); // // std::unordered_set data_r; @@ -59,38 +75,38 @@ int main() { // std::cout << data_s.size() << std::endl; // std::cout << data_c.size() << std::endl; - auto group_union = GroupUnion::unsafe_create(169); - std::cout << group_union << std::endl; +// auto group_union = GroupUnion::unsafe_create(169); +// std::cout << group_union << std::endl; - auto group = Group::from_common_code(CommonCode::unsafe_create(0x1A9BF0C00)); - std::cout << group << std::endl; +// auto group = Group::from_common_code(CommonCode::unsafe_create(0x1A9BF0C00)); +// std::cout << group << std::endl; // std::cout << group.type_id() << std::endl; // std::cout << group.pattern_id() << std::endl; // std::cout << (int)group.toward() << std::endl; // std::cout << group.toward_char() << std::endl; // std::cout << group.to_string() << std::endl; - auto info_1 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00)); - std::cout << info_1 << std::endl; - auto code_1 = GroupCases::obtain_code(info_1); - std::cout << code_1 << std::endl; - - auto info_2 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00).to_short_code()); - std::cout << info_2 << std::endl; - auto code_2 = GroupCases::obtain_code(info_2); - std::cout << code_2 << std::endl; - - GroupCases::build(); - - auto info_3 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00)); - std::cout << info_3 << std::endl; - auto code_3 = GroupCases::obtain_code(info_3); - std::cout << code_3 << std::endl; - - auto info_4 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00).to_short_code()); - std::cout << info_4 << std::endl; - auto code_4 = GroupCases::obtain_code(info_4); - std::cout << code_4 << std::endl; +// auto info_1 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00)); +// std::cout << info_1 << std::endl; +// auto code_1 = GroupCases::obtain_code(info_1); +// std::cout << code_1 << std::endl; +// +// auto info_2 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00).to_short_code()); +// std::cout << info_2 << std::endl; +// auto code_2 = GroupCases::obtain_code(info_2); +// std::cout << code_2 << std::endl; +// +// GroupCases::build(); +// +// auto info_3 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00)); +// std::cout << info_3 << std::endl; +// auto code_3 = GroupCases::obtain_code(info_3); +// std::cout << code_3 << std::endl; +// +// auto info_4 = GroupCases::obtain_info(CommonCode::unsafe_create(0x1A9BF0C00).to_short_code()); +// std::cout << info_4 << std::endl; +// auto code_4 = GroupCases::obtain_code(info_4); +// std::cout << code_4 << std::endl; // const auto common_code = CommonCode::unsafe_create(0x1A9BF0C00); // const auto group = Group::from_common_code(common_code); @@ -135,7 +151,11 @@ int main() { // std::cout << gp.size() << std::endl; // std::cout << (int)gp.mirror_type() << std::endl; - std::cerr << std::chrono::system_clock::now() - start << std::endl; +#if defined(__clang__) + std::cerr << (std::chrono::system_clock::now() - start).count() / 1000 << "ms" << std::endl; +#elif defined(__GNUC__) + std::cerr << (std::chrono::system_clock::now() - start).count() / 1000000 << "ms" << std::endl; +#endif // auto core = Core([](const uint64_t code, uint64_t) { // std::cout << RawCode::unsafe_create(code); diff --git a/src/core/utils/common.h b/src/core/utils/common.h index 7f5025b..4d46012 100644 --- a/src/core/utils/common.h +++ b/src/core/utils/common.h @@ -19,6 +19,8 @@ #include +// TODO: using constexpr + /// NOTE: 0b101 and 0b110 are reserved #define BLOCK_space 0b000 #define BLOCK_fill 0b111 diff --git a/src/core/utils/utility.h b/src/core/utils/utility.h index b04348a..73bf1bb 100644 --- a/src/core/utils/utility.h +++ b/src/core/utils/utility.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -22,7 +23,11 @@ } /// Marking compiler assumptions. -#define KLSK_ASSUME(expr) __builtin_assume(expr) +#if defined(__clang__) + #define KLSK_ASSUME(expr) __builtin_assume(expr) +#elif defined(__GNUC__) + #define KLSK_ASSUME(expr) [[assume(expr)]] +#endif /// Force function declaration to be inline. #define KLSK_INLINE __attribute__ ((always_inline))