diff --git a/src/core/benchmark/group.cc b/src/core/benchmark/group.cc index d3a8b2b..47fb2e2 100644 --- a/src/core/benchmark/group.cc +++ b/src/core/benchmark/group.cc @@ -149,6 +149,7 @@ static void GroupExtend(benchmark::State &state) { constexpr auto group = Group::unsafe_create(89, 0, Group::Toward::A); // constexpr auto group = Group::unsafe_create(51, 0, Group::Toward::A); + // constexpr auto group = Group::unsafe_create(98, 0, Group::Toward::A); volatile auto kk = group.cases(); diff --git a/src/core/group/group.h b/src/core/group/group.h index b92fc9b..f593009 100644 --- a/src/core/group/group.h +++ b/src/core/group/group.h @@ -204,6 +204,9 @@ public: /// Get all klotski cases under the current type id. [[nodiscard]] cases::RangesUnion cases() const; + // TODO: only for perf right now + [[nodiscard]] cases::RangesUnion cases_without(const cases::RangesUnion &data) const; + /// Get the group instance with the specified pattern id. [[nodiscard]] std::optional groups(uint_least16_t pattern_id) const; diff --git a/src/core/group/internal/group.cc b/src/core/group/internal/group.cc index 8082c5a..a2eb8dd 100644 --- a/src/core/group/internal/group.cc +++ b/src/core/group/internal/group.cc @@ -106,32 +106,182 @@ static RangesUnion extend_type_x(RawCode seed, size_t reserve) { }); } +KLSK_NOINLINE static void spawn_full_pattern(RawCode seed, const size_t reserve, RangesUnion &output) { + std::vector codes; + phmap::flat_hash_map cases; // + + codes.reserve(reserve); + cases.reserve(static_cast(reserve * 1.56)); + + auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) { + if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) { + iter->second |= hint; // update hint + return; + } + codes.emplace_back(code); + }); + + uint64_t offset = 0; + codes.emplace_back(seed); + cases.emplace(seed, 0); // without hint + while (offset != codes.size()) { + const auto curr = codes[offset++]; + core.next_cases(curr, cases.find(curr)->second); + } + + for (auto raw_code : codes) { + const auto code = raw_code.to_common_code().unwrap(); + output.ranges(code >> 32).emplace_back(static_cast(code)); + } +} + +KLSK_NOINLINE static void spawn_hor_pattern(RawCode seed, const size_t reserve, RangesUnion &output) { + std::vector codes; + phmap::flat_hash_map cases; // + + codes.reserve(reserve); + cases.reserve(static_cast(reserve * 1.56)); + + auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) { + if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) { + iter->second |= hint; // update hint + return; + } + codes.emplace_back(code); + }); + + uint64_t offset = 0; + codes.emplace_back(seed); + cases.emplace(seed, 0); // without hint + while (offset != codes.size()) { + const auto curr = codes[offset++]; + core.next_cases(curr, cases.find(curr)->second); + } + + for (auto raw_code : codes) { + const auto code = raw_code.to_common_code().unwrap(); + output.ranges(code >> 32).emplace_back(static_cast(code)); + + const auto code_ = raw_code.to_vertical_mirror().to_common_code().unwrap(); + output.ranges(code_ >> 32).emplace_back(static_cast(code_)); + } +} + +KLSK_NOINLINE static void spawn_ver_pattern(RawCode seed, const size_t reserve, RangesUnion &output) { + std::vector codes; + phmap::flat_hash_map cases; // + + codes.reserve(reserve); + cases.reserve(static_cast(reserve * 1.56)); + + auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) { + if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) { + iter->second |= hint; // update hint + return; + } + codes.emplace_back(code); + }); + + uint64_t offset = 0; + codes.emplace_back(seed); + cases.emplace(seed, 0); // without hint + while (offset != codes.size()) { + const auto curr = codes[offset++]; + core.next_cases(curr, cases.find(curr)->second); + } + + for (auto raw_code : codes) { + const auto code = raw_code.to_common_code().unwrap(); + output.ranges(code >> 32).emplace_back(static_cast(code)); + + const auto code_ = raw_code.to_horizontal_mirror().to_common_code().unwrap(); + output.ranges(code_ >> 32).emplace_back(static_cast(code_)); + } +} + +KLSK_NOINLINE static void spawn_ord_pattern(RawCode seed, const size_t reserve, RangesUnion &output) { + std::vector codes; + phmap::flat_hash_map cases; // + + codes.reserve(reserve); + cases.reserve(static_cast(reserve * 1.56)); + + auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) { + if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) { + iter->second |= hint; // update hint + return; + } + codes.emplace_back(code); + }); + + uint64_t offset = 0; + codes.emplace_back(seed); + cases.emplace(seed, 0); // without hint + while (offset != codes.size()) { + const auto curr = codes[offset++]; + core.next_cases(curr, cases.find(curr)->second); + } + + for (auto raw_code : codes) { + const auto code = raw_code.to_common_code().unwrap(); + output.ranges(code >> 32).emplace_back(static_cast(code)); + + const auto code_1 = raw_code.to_vertical_mirror().to_common_code().unwrap(); + output.ranges(code_1 >> 32).emplace_back(static_cast(code_1)); + + const auto code_2 = raw_code.to_horizontal_mirror().to_common_code().unwrap(); + output.ranges(code_2 >> 32).emplace_back(static_cast(code_2)); + + const auto code_3 = raw_code.to_diagonal_mirror().to_common_code().unwrap(); + output.ranges(code_3 >> 32).emplace_back(static_cast(code_3)); + } +} + KLSK_NOINLINE static RangesUnion extend_pro(uint8_t type_id) { - // std::cout << (int)type_id << std::endl; + // auto groups = GroupUnion::unsafe_create(type_id).groups(); + // RangesUnion others {}; + // for (size_t index = 1; index < groups.size(); ++index) { + // others += groups[index].cases(); + // } - auto groups = GroupUnion::unsafe_create(type_id).groups(); RangesUnion others {}; - for (size_t index = 1; index < groups.size(); ++index) { - others += groups[index].cases(); + auto gu = GroupUnion::unsafe_create(type_id); + for (int pattern_id = 1; pattern_id < gu.pattern_num(); ++pattern_id) { + + auto flat_id = klotski::group::PATTERN_OFFSET[type_id] + pattern_id; + auto mirror_type = static_cast(PATTERN_DATA[flat_id] & 0b111); + auto seed = CommonCode::unsafe_create(PATTERN_DATA[flat_id] >> 23).to_raw_code(); + auto size = (PATTERN_DATA[flat_id] >> 3) & 0xFFFFF; + + if (mirror_type == Group::MirrorType::Full) { + spawn_full_pattern(seed, size, others); + } else if (mirror_type == Group::MirrorType::Horizontal) { + spawn_hor_pattern(seed, size, others); + } else if (mirror_type == Group::MirrorType::Vertical) { + spawn_ver_pattern(seed, size, others); + } else if (mirror_type == Group::MirrorType::Centro) { + // std::abort(); + } else { + spawn_ord_pattern(seed, size, others); + } } - // std::cout << others.size() << std::endl; - auto all = GroupUnion::unsafe_create(type_id).cases(); - // std::cout << all.size() << std::endl; - - RangesUnion result {}; for (auto head : RangesUnion::Heads) { std::stable_sort(others.ranges(head).begin(), others.ranges(head).end()); - - std::set_difference(all.ranges(head).begin(), all.ranges(head).end(), - others.ranges(head).begin(), others.ranges(head).end(), - std::back_inserter(result.ranges(head))); } - // std::cout << result.size() << std::endl; + // auto all = GroupUnion::unsafe_create(type_id).cases(); + // + // RangesUnion result {}; + // for (auto head : RangesUnion::Heads) { + // std::set_difference(all.ranges(head).begin(), all.ranges(head).end(), + // others.ranges(head).begin(), others.ranges(head).end(), + // std::back_inserter(result.ranges(head))); + // } + // return result; - return result; + return GroupUnion::unsafe_create(type_id).cases_without(others); } RangesUnion Group::cases() const { @@ -145,6 +295,7 @@ RangesUnion Group::cases() const { // } if (pattern_id_ == 0 && mirror_type() == MirrorType::Full) { // TODO: black-list filter return extend_pro(type_id_); + // return GroupUnion::unsafe_create(type_id_).cases(); } auto seed = CommonCode::unsafe_create(PATTERN_DATA[flat_id()] >> 23).to_raw_code(); diff --git a/src/core/group/internal/group_union.cc b/src/core/group/internal/group_union.cc index 87b0b93..c6181f6 100644 --- a/src/core/group/internal/group_union.cc +++ b/src/core/group/internal/group_union.cc @@ -9,6 +9,8 @@ using klotski::cases::BASIC_RANGES_NUM; #define RANGE_DERIVE(HEAD) ranges.derive(HEAD, cases.ranges(HEAD)) #define RANGE_RESERVE(HEAD, SIZE) cases.ranges(HEAD).reserve(SIZE) +#define RANGE_DERIVE_WITHOUT(HEAD) ranges.derive_without(HEAD, cases.ranges(HEAD), data.ranges(HEAD)) + RangesUnion GroupUnion::cases() const { Ranges ranges {}; ranges.reserve(BASIC_RANGES_NUM[type_id_]); @@ -29,3 +31,24 @@ RangesUnion GroupUnion::cases() const { RANGE_DERIVE(0xC); RANGE_DERIVE(0xD); RANGE_DERIVE(0xE); return cases; } + +RangesUnion GroupUnion::cases_without(const cases::RangesUnion &data) const { + Ranges ranges {}; + ranges.reserve(BASIC_RANGES_NUM[type_id_]); + const auto [n, n_2x1, n_1x1] = BLOCK_NUM[type_id_]; + ranges.spawn(n, n_2x1, n_1x1); + ranges.reverse(); + + RangesUnion cases; + const auto [na, nb, nc, nd] = GROUP_UNION_CASES_NUM[type_id_]; + RANGE_RESERVE(0x0, na); RANGE_RESERVE(0x1, nb); RANGE_RESERVE(0x2, na); + RANGE_RESERVE(0x4, nc); RANGE_RESERVE(0x5, nd); RANGE_RESERVE(0x6, nc); + RANGE_RESERVE(0x8, nc); RANGE_RESERVE(0x9, nd); RANGE_RESERVE(0xA, nc); + RANGE_RESERVE(0xC, na); RANGE_RESERVE(0xD, nb); RANGE_RESERVE(0xE, na); + + RANGE_DERIVE_WITHOUT(0x0); RANGE_DERIVE_WITHOUT(0x1); RANGE_DERIVE_WITHOUT(0x2); + RANGE_DERIVE_WITHOUT(0x4); RANGE_DERIVE_WITHOUT(0x5); RANGE_DERIVE_WITHOUT(0x6); + RANGE_DERIVE_WITHOUT(0x8); RANGE_DERIVE_WITHOUT(0x9); RANGE_DERIVE_WITHOUT(0xA); + RANGE_DERIVE_WITHOUT(0xC); RANGE_DERIVE_WITHOUT(0xD); RANGE_DERIVE_WITHOUT(0xE); + return cases; +} diff --git a/src/core/main.cc b/src/core/main.cc index baa3a91..87143d4 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -70,59 +70,64 @@ int main() { // std::println("{}: {}", kk.to_string(), kk.size()); // } - // 0.960 27-0x - // 0.992 40-0x - // 0.999 50-0x - // 0.890 51-0x - // 0.996 61-0x - // 0.955 62-0x - // 0.982 73-0x - // 0.992 81-0x - // 0.786 82-0x - // 0.999 89-0x - // 0.989 90-0x - // 0.892 91-0x - // 0.998 98-0x - // 0.993 99-0x - // 0.942 100-0x - // 0.999 108-0x - // 0.968 109-0x - // 0.973 115-0x - // 0.641 116-0x - // 0.998 121-0x - // 0.969 122-0x - // 0.801 123-0x - // 0.998 127-0x - // 0.994 128-0x - // 0.976 129-0x - // 0.865 130-0x - // 0.999 134-0x - // 0.997 135-0x - // 0.990 136-0x - // 0.909 137-0x - // 0.998 143-0x - // 0.956 144-0x - // 0.933 148-0x - // 0.554 149-0x - // 0.988 152-0x - // 0.888 153-0x - // 0.558 154-0x - // 0.989 155-0x - // 0.989 156-0x - // 0.978 157-0x - // 0.930 158-0x - // 0.762 159-0x - // 0.977 161-0x - // 0.960 162-0x - // 0.928 163-0x - // 0.744 164-0x - // 0.997 167-0x - // 0.990 168-0x - // 0.584 181-0x - // 0.350 184-0x - // 0.313 185-0x - // 0.933 187-0x - // 0.342 188-0x + // 27-0x -> 0.960 (11284) + // 40-0x -> 0.992 (12012) + // 50-0x -> 0.999 (112640) + // 51-0x -> 0.890 (33792) + // 61-0x -> 0.996 (229680) + // 62-0x -> 0.955 (68904) + // 73-0x -> 0.982 (38412) + // 81-0x -> 0.992 (131040) + // 82-0x -> 0.786 (49140) + // 89-0x -> 0.999 (667800) + // 90-0x -> 0.989 (381600) + // 91-0x -> 0.892 (143100) + // 98-0x -> 0.998 (710220) + // 99-0x -> 0.993 (405840) + // 100-0x -> 0.942 (152190) + // 108-0x -> 0.999 (158400) + // 109-0x -> 0.968 (59400) + // 115-0x -> 0.973 (73920) + // 116-0x -> 0.641 (36960) + // 121-0x -> 0.998 (327600) + // 122-0x -> 0.969 (262080) + // 123-0x -> 0.801 (131040) + // 127-0x -> 0.998 (405888) + // 128-0x -> 0.994 (507360) + // 129-0x -> 0.976 (405888) + // 130-0x -> 0.865 (202944) + // 134-0x -> 0.999 (299264) + // 135-0x -> 0.997 (374080) + // 136-0x -> 0.990 (299264) + // 137-0x -> 0.909 (149632) + // 143-0x -> 0.998 (91392) + // 144-0x -> 0.956 (45696) + // 148-0x -> 0.933 (18960) + // 149-0x -> 0.554 (14220) + // 152-0x -> 0.988 (51660) + // 153-0x -> 0.888 (68880) + // 154-0x -> 0.558 (51660) + // 155-0x -> 0.989 (7120) + // 156-0x -> 0.989 (42720) + // 157-0x -> 0.978 (106800) + // 158-0x -> 0.930 (142400) + // 159-0x -> 0.762 (106800) + // 161-0x -> 0.977 (43704) + // 162-0x -> 0.960 (109260) + // 163-0x -> 0.928 (145680) + // 164-0x -> 0.744 (109260) + // 167-0x -> 0.997 (65880) + // 168-0x -> 0.990 (87840) + // 181-0x -> 0.584 (3084) + // 184-0x -> 0.350 (4288) + // 185-0x -> 0.313 (17152) + // 187-0x -> 0.933 (3196) + // 188-0x -> 0.342 (12784) + + // raw -> ~2170ms + // perf-a -> ~1315ms + // perf-b -> ~1311ms + // perf-c -> ~1272ms for (uint8_t type_id = 0; type_id < TYPE_ID_LIMIT; ++type_id) { for (auto group: GroupUnion::unsafe_create(type_id).groups()) { diff --git a/src/core/ranges/internal/ranges.cc b/src/core/ranges/internal/ranges.cc index 39b522c..190d329 100644 --- a/src/core/ranges/internal/ranges.cc +++ b/src/core/ranges/internal/ranges.cc @@ -36,3 +36,33 @@ void Ranges::derive(const int head, Ranges &output) const { output.emplace_back(range_reverse((*this)[index])); // release valid case } } + +void Ranges::derive_without(int head, Ranges &output, const Ranges &data) const { + + size_t data_index = 0; + + const uint32_t max_val = range_reverse(this->back()); + for (uint32_t index = 0; index < size(); ++index) { + if (const auto offset = check(head, (*this)[index])) { // invalid case + /// !! <- broken + /// ( xx xx xx ) xx xx xx ... [reversed range] + /// +1 00 00 00 ... (delta) + const uint32_t delta = 1U << (32 - offset * 2); // distance to next possible range + const auto min_next = delta + (range_reverse((*this)[index]) & ~(delta - 1)); + if (min_next > max_val) { + break; // index has overflowed + } + while (range_reverse((*this)[++index]) < min_next) {} // located next range + --index; + continue; + } + auto tmp = range_reverse((*this)[index]); + if (data_index < data.size() && tmp == data[data_index]) { + ++data_index; + } else { + output.emplace_back(tmp); + } + + // output.emplace_back(range_reverse((*this)[index])); // release valid case + } +} diff --git a/src/core/ranges/ranges.h b/src/core/ranges/ranges.h index 2a680b7..bf310a3 100644 --- a/src/core/ranges/ranges.h +++ b/src/core/ranges/ranges.h @@ -58,6 +58,9 @@ public: /// Derive the legal ranges from reversed ranges with specified head. void derive(int head, Ranges &output) const; + // TODO: only for perf right now + void derive_without(int head, Ranges &output, const Ranges &data) const; + /// Check whether the combination of head and reversed range is valid. static KLSK_INLINE_CE int check(int head, uint32_t range);