From bde7f95e574480a095a973dbd150dc4ab5fe36eb Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 9 Mar 2025 17:49:20 +0800 Subject: [PATCH] feat: black-list filter of first full group builder --- src/core/benchmark/group.cc | 72 ++++++++++++++++- src/core/group/internal/group.cc | 22 ++++- src/core/group/internal/group_cases.cc | 13 ++- src/core/main.cc | 106 ++++++++++++------------- 4 files changed, 147 insertions(+), 66 deletions(-) diff --git a/src/core/benchmark/group.cc b/src/core/benchmark/group.cc index 92c691a..ae9545a 100644 --- a/src/core/benchmark/group.cc +++ b/src/core/benchmark/group.cc @@ -136,11 +136,11 @@ static void GroupExtend(benchmark::State &state) { for (int type_id = 0; type_id < TYPE_ID_LIMIT; ++type_id) { for (auto group : GroupUnion::unsafe_create(type_id).groups()) { - if (group.mirror_type() == Group::MirrorType::Full) { + // if (group.mirror_type() == Group::MirrorType::Full) { // if (group.mirror_type() == Group::MirrorType::Horizontal) { // if (group.mirror_type() == Group::MirrorType::Centro) { // if (group.mirror_type() == Group::MirrorType::Vertical) { - // if (group.mirror_type() == Group::MirrorType::Ordinary) { + if (group.mirror_type() == Group::MirrorType::Ordinary) { // std::println("{} ({})", group.to_string(), group.size()); volatile auto kk = group.cases(); } @@ -451,10 +451,20 @@ static void GroupCasesBuild(benchmark::State &state) { } +static void SingleGroupExtend(benchmark::State &state) { + uint8_t type_id = state.range(0); + auto group = Group::unsafe_create(type_id, 0, Group::Toward::A); + + for (auto _ : state) { + // std::println("{} ({})", group.to_string(), group.size()); + volatile auto kk = group.cases(); + } +} + // BENCHMARK(CommonCodeToTypeId)->Arg(8)->Arg(64)->Arg(256); // BENCHMARK(RawCodeToTypeId)->Arg(8)->Arg(64)->Arg(256); -// BENCHMARK(GroupExtend)->Unit(benchmark::kMillisecond); +BENCHMARK(GroupExtend)->Unit(benchmark::kMillisecond); // BENCHMARK(GroupExtend)->Unit(benchmark::kMicrosecond); // BENCHMARK(FilterFromAllCases)->Unit(benchmark::kMillisecond); @@ -473,7 +483,61 @@ static void GroupCasesBuild(benchmark::State &state) { // BENCHMARK(FastObtainCode); -BENCHMARK(GroupCasesBuild)->Unit(benchmark::kMillisecond); +// BENCHMARK(GroupCasesBuild)->Unit(benchmark::kMillisecond); + +// BENCHMARK(SingleGroupExtend)->Arg(27)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(40)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(50)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(51)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(61)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(62)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(73)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(81)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(82)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(89)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(90)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(91)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(98)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(99)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(100)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(108)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(109)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(115)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(116)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(121)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(122)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(123)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(127)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(128)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(129)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(130)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(134)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(135)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(136)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(137)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(143)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(144)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(148)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(149)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(152)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(153)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(154)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(155)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(156)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(157)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(158)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(159)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(161)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(162)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(163)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(164)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(167)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(168)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(181)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(184)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(185)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(187)->Unit(benchmark::kMicrosecond); +// BENCHMARK(SingleGroupExtend)->Arg(188)->Unit(benchmark::kMicrosecond); // BENCHMARK(IsVerticalMirror); // BENCHMARK(IsHorizontalMirror); diff --git a/src/core/group/internal/group.cc b/src/core/group/internal/group.cc index b128474..20afce0 100644 --- a/src/core/group/internal/group.cc +++ b/src/core/group/internal/group.cc @@ -180,8 +180,26 @@ RangesUnion Group::cases() const { if (GroupUnion::unsafe_create(type_id_).group_num() == 1) { return GroupUnion::unsafe_create(type_id_).cases(); } - if (pattern_id_ == 0 && mirror_type() == MirrorType::Full) { // TODO: black-list filter - return first_x_group(type_id_); + + // + 116: 3321us vs 3162us + // - 149: 989us vs 1406us + // - 154: 4257us vs 6521us + // + 159: 12801us vs 11804us + // - 164: 13067us vs 13974us + // - 181: 206us vs 613us + // - 184: 170us vs 935us + // - 185: 733us vs 3674us + // - 187: 391us vs 530us + // - 188: 587us vs 3155us + + if (pattern_id_ == 0 && mirror_type() == MirrorType::Full) { + + // TODO: it seems that this filter has a negative impact on non-full group cases performance. + // if (type_id_ != 149 && type_id_ != 154 && type_id_ != 164 && type_id_ != 181 && type_id_ != 184 && type_id_ != 185 && type_id_ != 187 && type_id_ != 188) { + // if (type_id_ != 149 && type_id_ != 154 && type_id_ != 164 && type_id_ < 181) { + return first_x_group(type_id_); + // } + } const auto seed_val = PATTERN_DATA[flat_id()] >> 23; diff --git a/src/core/group/internal/group_cases.cc b/src/core/group/internal/group_cases.cc index be101ea..b01321b 100644 --- a/src/core/group/internal/group_cases.cc +++ b/src/core/group/internal/group_cases.cc @@ -347,20 +347,19 @@ static std::vector build_tmp_data() { } void GroupCases::build() { - // if (fast_) { - // return; - // } - // std::lock_guard guard {busy_}; + if (fast_) { + return; + } + std::lock_guard guard {busy_}; // TODO: make `data` as class member - static auto data_array = build_ru_array(); ru_data_array = &data_array; static auto data_2 = build_tmp_data(); rev_data = &data_2; - // KLSK_MEM_BARRIER; - // fast_ = true; + KLSK_MEM_BARRIER; // TODO: should we move it to the end? + fast_ = true; } void GroupCases::build_async(Executor &&executor, Notifier &&callback) { diff --git a/src/core/main.cc b/src/core/main.cc index caf7f86..27adde5 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -71,59 +71,59 @@ int main() { // std::println("{}: {}", kk.to_string(), kk.size()); // } - // 27-0x -> 0.960 (11284) - // 40-0x -> 0.992 (12012) - // 50-0x -> 0.999 (112640) - // 51-0x -> 0.890 (33792) - // 61-0x -> 0.996 (229680) - // 62-0x -> 0.955 (68904) - // 73-0x -> 0.982 (38412) - // 81-0x -> 0.992 (131040) - // 82-0x -> 0.786 (49140) - // 89-0x -> 0.999 (667800) - // 90-0x -> 0.989 (381600) - // 91-0x -> 0.892 (143100) - // 98-0x -> 0.998 (710220) - // 99-0x -> 0.993 (405840) - // 100-0x -> 0.942 (152190) - // 108-0x -> 0.999 (158400) - // 109-0x -> 0.968 (59400) - // 115-0x -> 0.973 (73920) - // 116-0x -> 0.641 (36960) - // 121-0x -> 0.998 (327600) - // 122-0x -> 0.969 (262080) - // 123-0x -> 0.801 (131040) - // 127-0x -> 0.998 (405888) - // 128-0x -> 0.994 (507360) - // 129-0x -> 0.976 (405888) - // 130-0x -> 0.865 (202944) - // 134-0x -> 0.999 (299264) - // 135-0x -> 0.997 (374080) - // 136-0x -> 0.990 (299264) - // 137-0x -> 0.909 (149632) - // 143-0x -> 0.998 (91392) - // 144-0x -> 0.956 (45696) - // 148-0x -> 0.933 (18960) - // 149-0x -> 0.554 (14220) - // 152-0x -> 0.988 (51660) - // 153-0x -> 0.888 (68880) - // 154-0x -> 0.558 (51660) - // 155-0x -> 0.989 (7120) - // 156-0x -> 0.989 (42720) - // 157-0x -> 0.978 (106800) - // 158-0x -> 0.930 (142400) - // 159-0x -> 0.762 (106800) - // 161-0x -> 0.977 (43704) - // 162-0x -> 0.960 (109260) - // 163-0x -> 0.928 (145680) - // 164-0x -> 0.744 (109260) - // 167-0x -> 0.997 (65880) - // 168-0x -> 0.990 (87840) - // 181-0x -> 0.584 (3084) - // 184-0x -> 0.350 (4288) - // 185-0x -> 0.313 (17152) - // 187-0x -> 0.933 (3196) - // 188-0x -> 0.342 (12784) + // 27-0x -> 0.960 (11284) | 1361 vs 348 us (3.911) + // 40-0x -> 0.992 (12012) | 1539 vs 367 us (4.193) + // 50-0x -> 0.999 (112640) | 17146 vs 3399 us (5.044) + // 51-0x -> 0.890 (33792) | 4154 vs 1482 us (2.803) + // 61-0x -> 0.996 (229680) | 36364 vs 8225 us (4.421) + // 62-0x -> 0.955 (68904) | 9691 vs 2941 us (3.295) + // 73-0x -> 0.982 (38412) | 5603 vs 1525 us (3.674) + // 81-0x -> 0.992 (131040) | 19927 vs 4811 us (4.142) + // 82-0x -> 0.786 (49140) | 5534 vs 3044 us (1.818) + // 89-0x -> 0.999 (667800) | 137581 vs 27786 us (4.951) + // 90-0x -> 0.989 (381600) | 63847 vs 16757 us (3.810) + // 91-0x -> 0.892 (143100) | 19233 vs 8284 us (2.322) + // 98-0x -> 0.998 (710220) | 149224 vs 33134 us (4.504) + // 99-0x -> 0.993 (405840) | 69654 vs 19636 us (3.547) + // 100-0x -> 0.942 (152190) | 22446 vs 8742 us (2.568) + // 108-0x -> 0.999 (158400) | 26240 vs 7689 us (3.413) + // 109-0x -> 0.968 (59400) | 9063 vs 3308 us (2.740) + // 115-0x -> 0.973 (73920) | 10601 vs 3349 us (3.165) + // 116-0x -> 0.641 (36960) | 3245 vs 3181 us (1.020) !!! + // 121-0x -> 0.998 (327600) | 55342 vs 16942 us (3.267) + // 122-0x -> 0.969 (262080) | 39966 vs 14853 us (2.691) + // 123-0x -> 0.801 (131040) | 15959 vs 10343 us (1.543) + // 127-0x -> 0.998 (405888) | 72385 vs 24294 us (2.980) + // 128-0x -> 0.994 (507360) | 90921 vs 30098 us (3.021) + // 129-0x -> 0.976 (405888) | 67859 vs 25681 us (2.642) + // 130-0x -> 0.865 (202944) | 28119 vs 16330 us (1.722) + // 134-0x -> 0.999 (299264) | 54399 vs 19285 us (2.821) + // 135-0x -> 0.997 (374080) | 67157 vs 24329 us (2.760) + // 136-0x -> 0.990 (299264) | 51483 vs 20071 us (2.565) + // 137-0x -> 0.909 (149632) | 22062 vs 12102 us (1.823) + // 143-0x -> 0.998 (91392) | 14670 vs 6050 us (2.425) + // 144-0x -> 0.956 (45696) | 7011 vs 3441 us (2.037) + // 148-0x -> 0.933 (18960) | 2362 vs 1053 us (2.243) + // 149-0x -> 0.554 (14220) | 975 vs 1422 us (0.686) !!! + // 152-0x -> 0.988 (51660) | 7476 vs 3708 us (2.016) + // 153-0x -> 0.888 (68880) | 9009 vs 5698 us (1.581) + // 154-0x -> 0.558 (51660) | 4253 vs 6546 us (0.650) !!! + // 155-0x -> 0.989 (7120) | 945 vs 708 us (1.335) + // 156-0x -> 0.989 (42720) | 6524 vs 3593 us (1.816) + // 157-0x -> 0.978 (106800) | 16333 vs 8605 us (1.898) + // 158-0x -> 0.930 (142400) | 20658 vs 12092 us (1.708) + // 159-0x -> 0.762 (106800) | 12675 vs 11828 us (1.072) !!! + // 161-0x -> 0.977 (43704) | 6775 vs 4351 us (1.557) + // 162-0x -> 0.960 (109260) | 17078 vs 10237 us (1.668) + // 163-0x -> 0.928 (145680) | 21689 vs 14229 us (1.524) + // 164-0x -> 0.744 (109260) | 13018 vs 13974 us (0.932) !!! + // 167-0x -> 0.997 (65880) | 10657 vs 6014 us (1.772) + // 168-0x -> 0.990 (87840) | 14110 vs 7916 us (1.782) + // 181-0x -> 0.584 (3084) | 200 vs 615 us (0.325) !!! + // 184-0x -> 0.350 (4288) | 167 vs 935 us (0.179) !!! + // 185-0x -> 0.313 (17152) | 723 vs 3639 us (0.199) !!! + // 187-0x -> 0.933 (3196) | 378 vs 543 us (0.696) !!! + // 188-0x -> 0.342 (12784) | 586 vs 3145 us (0.186) !!! // raw -> ~2170ms // perf-a -> ~1315ms