Browse Source

perf: build and filter with pattern cases

master
Dnomd343 1 week ago
parent
commit
b02c898133
  1. 1
      src/core/benchmark/group.cc
  2. 3
      src/core/group/group.h
  3. 181
      src/core/group/internal/group.cc
  4. 23
      src/core/group/internal/group_union.cc
  5. 111
      src/core/main.cc
  6. 30
      src/core/ranges/internal/ranges.cc
  7. 3
      src/core/ranges/ranges.h

1
src/core/benchmark/group.cc

@ -149,6 +149,7 @@ static void GroupExtend(benchmark::State &state) {
constexpr auto group = Group::unsafe_create(89, 0, Group::Toward::A);
// constexpr auto group = Group::unsafe_create(51, 0, Group::Toward::A);
// constexpr auto group = Group::unsafe_create(98, 0, Group::Toward::A);
volatile auto kk = group.cases();

3
src/core/group/group.h

@ -204,6 +204,9 @@ public:
/// Get all klotski cases under the current type id.
[[nodiscard]] cases::RangesUnion cases() const;
// TODO: only for perf right now
[[nodiscard]] cases::RangesUnion cases_without(const cases::RangesUnion &data) const;
/// Get the group instance with the specified pattern id.
[[nodiscard]] std::optional<Groups> groups(uint_least16_t pattern_id) const;

181
src/core/group/internal/group.cc

@ -106,32 +106,182 @@ static RangesUnion extend_type_x(RawCode seed, size_t reserve) {
});
}
KLSK_NOINLINE static void spawn_full_pattern(RawCode seed, const size_t reserve, RangesUnion &output) {
std::vector<RawCode> codes;
phmap::flat_hash_map<RawCode, uint64_t> cases; // <code, hint>
codes.reserve(reserve);
cases.reserve(static_cast<size_t>(reserve * 1.56));
auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) {
if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) {
iter->second |= hint; // update hint
return;
}
codes.emplace_back(code);
});
uint64_t offset = 0;
codes.emplace_back(seed);
cases.emplace(seed, 0); // without hint
while (offset != codes.size()) {
const auto curr = codes[offset++];
core.next_cases(curr, cases.find(curr)->second);
}
for (auto raw_code : codes) {
const auto code = raw_code.to_common_code().unwrap();
output.ranges(code >> 32).emplace_back(static_cast<uint32_t>(code));
}
}
KLSK_NOINLINE static void spawn_hor_pattern(RawCode seed, const size_t reserve, RangesUnion &output) {
std::vector<RawCode> codes;
phmap::flat_hash_map<RawCode, uint64_t> cases; // <code, hint>
codes.reserve(reserve);
cases.reserve(static_cast<size_t>(reserve * 1.56));
auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) {
if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) {
iter->second |= hint; // update hint
return;
}
codes.emplace_back(code);
});
uint64_t offset = 0;
codes.emplace_back(seed);
cases.emplace(seed, 0); // without hint
while (offset != codes.size()) {
const auto curr = codes[offset++];
core.next_cases(curr, cases.find(curr)->second);
}
for (auto raw_code : codes) {
const auto code = raw_code.to_common_code().unwrap();
output.ranges(code >> 32).emplace_back(static_cast<uint32_t>(code));
const auto code_ = raw_code.to_vertical_mirror().to_common_code().unwrap();
output.ranges(code_ >> 32).emplace_back(static_cast<uint32_t>(code_));
}
}
KLSK_NOINLINE static void spawn_ver_pattern(RawCode seed, const size_t reserve, RangesUnion &output) {
std::vector<RawCode> codes;
phmap::flat_hash_map<RawCode, uint64_t> cases; // <code, hint>
codes.reserve(reserve);
cases.reserve(static_cast<size_t>(reserve * 1.56));
auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) {
if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) {
iter->second |= hint; // update hint
return;
}
codes.emplace_back(code);
});
uint64_t offset = 0;
codes.emplace_back(seed);
cases.emplace(seed, 0); // without hint
while (offset != codes.size()) {
const auto curr = codes[offset++];
core.next_cases(curr, cases.find(curr)->second);
}
for (auto raw_code : codes) {
const auto code = raw_code.to_common_code().unwrap();
output.ranges(code >> 32).emplace_back(static_cast<uint32_t>(code));
const auto code_ = raw_code.to_horizontal_mirror().to_common_code().unwrap();
output.ranges(code_ >> 32).emplace_back(static_cast<uint32_t>(code_));
}
}
KLSK_NOINLINE static void spawn_ord_pattern(RawCode seed, const size_t reserve, RangesUnion &output) {
std::vector<RawCode> codes;
phmap::flat_hash_map<RawCode, uint64_t> cases; // <code, hint>
codes.reserve(reserve);
cases.reserve(static_cast<size_t>(reserve * 1.56));
auto core = MaskMover([&codes, &cases](RawCode code, uint64_t hint) {
if (const auto [iter, ret] = cases.try_emplace(code, hint); !ret) {
iter->second |= hint; // update hint
return;
}
codes.emplace_back(code);
});
uint64_t offset = 0;
codes.emplace_back(seed);
cases.emplace(seed, 0); // without hint
while (offset != codes.size()) {
const auto curr = codes[offset++];
core.next_cases(curr, cases.find(curr)->second);
}
for (auto raw_code : codes) {
const auto code = raw_code.to_common_code().unwrap();
output.ranges(code >> 32).emplace_back(static_cast<uint32_t>(code));
const auto code_1 = raw_code.to_vertical_mirror().to_common_code().unwrap();
output.ranges(code_1 >> 32).emplace_back(static_cast<uint32_t>(code_1));
const auto code_2 = raw_code.to_horizontal_mirror().to_common_code().unwrap();
output.ranges(code_2 >> 32).emplace_back(static_cast<uint32_t>(code_2));
const auto code_3 = raw_code.to_diagonal_mirror().to_common_code().unwrap();
output.ranges(code_3 >> 32).emplace_back(static_cast<uint32_t>(code_3));
}
}
KLSK_NOINLINE static RangesUnion extend_pro(uint8_t type_id) {
// std::cout << (int)type_id << std::endl;
// auto groups = GroupUnion::unsafe_create(type_id).groups();
// RangesUnion others {};
// for (size_t index = 1; index < groups.size(); ++index) {
// others += groups[index].cases();
// }
auto groups = GroupUnion::unsafe_create(type_id).groups();
RangesUnion others {};
for (size_t index = 1; index < groups.size(); ++index) {
others += groups[index].cases();
auto gu = GroupUnion::unsafe_create(type_id);
for (int pattern_id = 1; pattern_id < gu.pattern_num(); ++pattern_id) {
auto flat_id = klotski::group::PATTERN_OFFSET[type_id] + pattern_id;
auto mirror_type = static_cast<Group::MirrorType>(PATTERN_DATA[flat_id] & 0b111);
auto seed = CommonCode::unsafe_create(PATTERN_DATA[flat_id] >> 23).to_raw_code();
auto size = (PATTERN_DATA[flat_id] >> 3) & 0xFFFFF;
if (mirror_type == Group::MirrorType::Full) {
spawn_full_pattern(seed, size, others);
} else if (mirror_type == Group::MirrorType::Horizontal) {
spawn_hor_pattern(seed, size, others);
} else if (mirror_type == Group::MirrorType::Vertical) {
spawn_ver_pattern(seed, size, others);
} else if (mirror_type == Group::MirrorType::Centro) {
// std::abort();
} else {
spawn_ord_pattern(seed, size, others);
}
}
// std::cout << others.size() << std::endl;
auto all = GroupUnion::unsafe_create(type_id).cases();
// std::cout << all.size() << std::endl;
RangesUnion result {};
for (auto head : RangesUnion::Heads) {
std::stable_sort(others.ranges(head).begin(), others.ranges(head).end());
std::set_difference(all.ranges(head).begin(), all.ranges(head).end(),
others.ranges(head).begin(), others.ranges(head).end(),
std::back_inserter(result.ranges(head)));
}
// std::cout << result.size() << std::endl;
// auto all = GroupUnion::unsafe_create(type_id).cases();
//
// RangesUnion result {};
// for (auto head : RangesUnion::Heads) {
// std::set_difference(all.ranges(head).begin(), all.ranges(head).end(),
// others.ranges(head).begin(), others.ranges(head).end(),
// std::back_inserter(result.ranges(head)));
// }
// return result;
return result;
return GroupUnion::unsafe_create(type_id).cases_without(others);
}
RangesUnion Group::cases() const {
@ -145,6 +295,7 @@ RangesUnion Group::cases() const {
// }
if (pattern_id_ == 0 && mirror_type() == MirrorType::Full) { // TODO: black-list filter
return extend_pro(type_id_);
// return GroupUnion::unsafe_create(type_id_).cases();
}
auto seed = CommonCode::unsafe_create(PATTERN_DATA[flat_id()] >> 23).to_raw_code();

23
src/core/group/internal/group_union.cc

@ -9,6 +9,8 @@ using klotski::cases::BASIC_RANGES_NUM;
#define RANGE_DERIVE(HEAD) ranges.derive(HEAD, cases.ranges(HEAD))
#define RANGE_RESERVE(HEAD, SIZE) cases.ranges(HEAD).reserve(SIZE)
#define RANGE_DERIVE_WITHOUT(HEAD) ranges.derive_without(HEAD, cases.ranges(HEAD), data.ranges(HEAD))
RangesUnion GroupUnion::cases() const {
Ranges ranges {};
ranges.reserve(BASIC_RANGES_NUM[type_id_]);
@ -29,3 +31,24 @@ RangesUnion GroupUnion::cases() const {
RANGE_DERIVE(0xC); RANGE_DERIVE(0xD); RANGE_DERIVE(0xE);
return cases;
}
RangesUnion GroupUnion::cases_without(const cases::RangesUnion &data) const {
Ranges ranges {};
ranges.reserve(BASIC_RANGES_NUM[type_id_]);
const auto [n, n_2x1, n_1x1] = BLOCK_NUM[type_id_];
ranges.spawn(n, n_2x1, n_1x1);
ranges.reverse();
RangesUnion cases;
const auto [na, nb, nc, nd] = GROUP_UNION_CASES_NUM[type_id_];
RANGE_RESERVE(0x0, na); RANGE_RESERVE(0x1, nb); RANGE_RESERVE(0x2, na);
RANGE_RESERVE(0x4, nc); RANGE_RESERVE(0x5, nd); RANGE_RESERVE(0x6, nc);
RANGE_RESERVE(0x8, nc); RANGE_RESERVE(0x9, nd); RANGE_RESERVE(0xA, nc);
RANGE_RESERVE(0xC, na); RANGE_RESERVE(0xD, nb); RANGE_RESERVE(0xE, na);
RANGE_DERIVE_WITHOUT(0x0); RANGE_DERIVE_WITHOUT(0x1); RANGE_DERIVE_WITHOUT(0x2);
RANGE_DERIVE_WITHOUT(0x4); RANGE_DERIVE_WITHOUT(0x5); RANGE_DERIVE_WITHOUT(0x6);
RANGE_DERIVE_WITHOUT(0x8); RANGE_DERIVE_WITHOUT(0x9); RANGE_DERIVE_WITHOUT(0xA);
RANGE_DERIVE_WITHOUT(0xC); RANGE_DERIVE_WITHOUT(0xD); RANGE_DERIVE_WITHOUT(0xE);
return cases;
}

111
src/core/main.cc

@ -70,59 +70,64 @@ int main() {
// std::println("{}: {}", kk.to_string(), kk.size());
// }
// 0.960 27-0x
// 0.992 40-0x
// 0.999 50-0x
// 0.890 51-0x
// 0.996 61-0x
// 0.955 62-0x
// 0.982 73-0x
// 0.992 81-0x
// 0.786 82-0x
// 0.999 89-0x
// 0.989 90-0x
// 0.892 91-0x
// 0.998 98-0x
// 0.993 99-0x
// 0.942 100-0x
// 0.999 108-0x
// 0.968 109-0x
// 0.973 115-0x
// 0.641 116-0x
// 0.998 121-0x
// 0.969 122-0x
// 0.801 123-0x
// 0.998 127-0x
// 0.994 128-0x
// 0.976 129-0x
// 0.865 130-0x
// 0.999 134-0x
// 0.997 135-0x
// 0.990 136-0x
// 0.909 137-0x
// 0.998 143-0x
// 0.956 144-0x
// 0.933 148-0x
// 0.554 149-0x
// 0.988 152-0x
// 0.888 153-0x
// 0.558 154-0x
// 0.989 155-0x
// 0.989 156-0x
// 0.978 157-0x
// 0.930 158-0x
// 0.762 159-0x
// 0.977 161-0x
// 0.960 162-0x
// 0.928 163-0x
// 0.744 164-0x
// 0.997 167-0x
// 0.990 168-0x
// 0.584 181-0x
// 0.350 184-0x
// 0.313 185-0x
// 0.933 187-0x
// 0.342 188-0x
// 27-0x -> 0.960 (11284)
// 40-0x -> 0.992 (12012)
// 50-0x -> 0.999 (112640)
// 51-0x -> 0.890 (33792)
// 61-0x -> 0.996 (229680)
// 62-0x -> 0.955 (68904)
// 73-0x -> 0.982 (38412)
// 81-0x -> 0.992 (131040)
// 82-0x -> 0.786 (49140)
// 89-0x -> 0.999 (667800)
// 90-0x -> 0.989 (381600)
// 91-0x -> 0.892 (143100)
// 98-0x -> 0.998 (710220)
// 99-0x -> 0.993 (405840)
// 100-0x -> 0.942 (152190)
// 108-0x -> 0.999 (158400)
// 109-0x -> 0.968 (59400)
// 115-0x -> 0.973 (73920)
// 116-0x -> 0.641 (36960)
// 121-0x -> 0.998 (327600)
// 122-0x -> 0.969 (262080)
// 123-0x -> 0.801 (131040)
// 127-0x -> 0.998 (405888)
// 128-0x -> 0.994 (507360)
// 129-0x -> 0.976 (405888)
// 130-0x -> 0.865 (202944)
// 134-0x -> 0.999 (299264)
// 135-0x -> 0.997 (374080)
// 136-0x -> 0.990 (299264)
// 137-0x -> 0.909 (149632)
// 143-0x -> 0.998 (91392)
// 144-0x -> 0.956 (45696)
// 148-0x -> 0.933 (18960)
// 149-0x -> 0.554 (14220)
// 152-0x -> 0.988 (51660)
// 153-0x -> 0.888 (68880)
// 154-0x -> 0.558 (51660)
// 155-0x -> 0.989 (7120)
// 156-0x -> 0.989 (42720)
// 157-0x -> 0.978 (106800)
// 158-0x -> 0.930 (142400)
// 159-0x -> 0.762 (106800)
// 161-0x -> 0.977 (43704)
// 162-0x -> 0.960 (109260)
// 163-0x -> 0.928 (145680)
// 164-0x -> 0.744 (109260)
// 167-0x -> 0.997 (65880)
// 168-0x -> 0.990 (87840)
// 181-0x -> 0.584 (3084)
// 184-0x -> 0.350 (4288)
// 185-0x -> 0.313 (17152)
// 187-0x -> 0.933 (3196)
// 188-0x -> 0.342 (12784)
// raw -> ~2170ms
// perf-a -> ~1315ms
// perf-b -> ~1311ms
// perf-c -> ~1272ms
for (uint8_t type_id = 0; type_id < TYPE_ID_LIMIT; ++type_id) {
for (auto group: GroupUnion::unsafe_create(type_id).groups()) {

30
src/core/ranges/internal/ranges.cc

@ -36,3 +36,33 @@ void Ranges::derive(const int head, Ranges &output) const {
output.emplace_back(range_reverse((*this)[index])); // release valid case
}
}
void Ranges::derive_without(int head, Ranges &output, const Ranges &data) const {
size_t data_index = 0;
const uint32_t max_val = range_reverse(this->back());
for (uint32_t index = 0; index < size(); ++index) {
if (const auto offset = check(head, (*this)[index])) { // invalid case
/// !! <- broken
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
const uint32_t delta = 1U << (32 - offset * 2); // distance to next possible range
const auto min_next = delta + (range_reverse((*this)[index]) & ~(delta - 1));
if (min_next > max_val) {
break; // index has overflowed
}
while (range_reverse((*this)[++index]) < min_next) {} // located next range
--index;
continue;
}
auto tmp = range_reverse((*this)[index]);
if (data_index < data.size() && tmp == data[data_index]) {
++data_index;
} else {
output.emplace_back(tmp);
}
// output.emplace_back(range_reverse((*this)[index])); // release valid case
}
}

3
src/core/ranges/ranges.h

@ -58,6 +58,9 @@ public:
/// Derive the legal ranges from reversed ranges with specified head.
void derive(int head, Ranges &output) const;
// TODO: only for perf right now
void derive_without(int head, Ranges &output, const Ranges &data) const;
/// Check whether the combination of head and reversed range is valid.
static KLSK_INLINE_CE int check(int head, uint32_t range);

Loading…
Cancel
Save