Browse Source

feat: black-list filter of first full group builder

master
Dnomd343 4 weeks ago
parent
commit
bde7f95e57
  1. 72
      src/core/benchmark/group.cc
  2. 22
      src/core/group/internal/group.cc
  3. 13
      src/core/group/internal/group_cases.cc
  4. 106
      src/core/main.cc

72
src/core/benchmark/group.cc

@ -136,11 +136,11 @@ static void GroupExtend(benchmark::State &state) {
for (int type_id = 0; type_id < TYPE_ID_LIMIT; ++type_id) {
for (auto group : GroupUnion::unsafe_create(type_id).groups()) {
if (group.mirror_type() == Group::MirrorType::Full) {
// if (group.mirror_type() == Group::MirrorType::Full) {
// if (group.mirror_type() == Group::MirrorType::Horizontal) {
// if (group.mirror_type() == Group::MirrorType::Centro) {
// if (group.mirror_type() == Group::MirrorType::Vertical) {
// if (group.mirror_type() == Group::MirrorType::Ordinary) {
if (group.mirror_type() == Group::MirrorType::Ordinary) {
// std::println("{} ({})", group.to_string(), group.size());
volatile auto kk = group.cases();
}
@ -451,10 +451,20 @@ static void GroupCasesBuild(benchmark::State &state) {
}
static void SingleGroupExtend(benchmark::State &state) {
uint8_t type_id = state.range(0);
auto group = Group::unsafe_create(type_id, 0, Group::Toward::A);
for (auto _ : state) {
// std::println("{} ({})", group.to_string(), group.size());
volatile auto kk = group.cases();
}
}
// BENCHMARK(CommonCodeToTypeId)->Arg(8)->Arg(64)->Arg(256);
// BENCHMARK(RawCodeToTypeId)->Arg(8)->Arg(64)->Arg(256);
// BENCHMARK(GroupExtend)->Unit(benchmark::kMillisecond);
BENCHMARK(GroupExtend)->Unit(benchmark::kMillisecond);
// BENCHMARK(GroupExtend)->Unit(benchmark::kMicrosecond);
// BENCHMARK(FilterFromAllCases)->Unit(benchmark::kMillisecond);
@ -473,7 +483,61 @@ static void GroupCasesBuild(benchmark::State &state) {
// BENCHMARK(FastObtainCode);
BENCHMARK(GroupCasesBuild)->Unit(benchmark::kMillisecond);
// BENCHMARK(GroupCasesBuild)->Unit(benchmark::kMillisecond);
// BENCHMARK(SingleGroupExtend)->Arg(27)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(40)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(50)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(51)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(61)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(62)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(73)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(81)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(82)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(89)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(90)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(91)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(98)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(99)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(100)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(108)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(109)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(115)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(116)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(121)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(122)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(123)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(127)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(128)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(129)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(130)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(134)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(135)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(136)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(137)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(143)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(144)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(148)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(149)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(152)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(153)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(154)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(155)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(156)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(157)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(158)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(159)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(161)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(162)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(163)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(164)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(167)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(168)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(181)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(184)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(185)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(187)->Unit(benchmark::kMicrosecond);
// BENCHMARK(SingleGroupExtend)->Arg(188)->Unit(benchmark::kMicrosecond);
// BENCHMARK(IsVerticalMirror);
// BENCHMARK(IsHorizontalMirror);

22
src/core/group/internal/group.cc

@ -180,8 +180,26 @@ RangesUnion Group::cases() const {
if (GroupUnion::unsafe_create(type_id_).group_num() == 1) {
return GroupUnion::unsafe_create(type_id_).cases();
}
if (pattern_id_ == 0 && mirror_type() == MirrorType::Full) { // TODO: black-list filter
return first_x_group(type_id_);
// + 116: 3321us vs 3162us
// - 149: 989us vs 1406us
// - 154: 4257us vs 6521us
// + 159: 12801us vs 11804us
// - 164: 13067us vs 13974us
// - 181: 206us vs 613us
// - 184: 170us vs 935us
// - 185: 733us vs 3674us
// - 187: 391us vs 530us
// - 188: 587us vs 3155us
if (pattern_id_ == 0 && mirror_type() == MirrorType::Full) {
// TODO: it seems that this filter has a negative impact on non-full group cases performance.
// if (type_id_ != 149 && type_id_ != 154 && type_id_ != 164 && type_id_ != 181 && type_id_ != 184 && type_id_ != 185 && type_id_ != 187 && type_id_ != 188) {
// if (type_id_ != 149 && type_id_ != 154 && type_id_ != 164 && type_id_ < 181) {
return first_x_group(type_id_);
// }
}
const auto seed_val = PATTERN_DATA[flat_id()] >> 23;

13
src/core/group/internal/group_cases.cc

@ -347,20 +347,19 @@ static std::vector<case_info_t> build_tmp_data() {
}
void GroupCases::build() {
// if (fast_) {
// return;
// }
// std::lock_guard guard {busy_};
if (fast_) {
return;
}
std::lock_guard guard {busy_};
// TODO: make `data` as class member
static auto data_array = build_ru_array();
ru_data_array = &data_array;
static auto data_2 = build_tmp_data();
rev_data = &data_2;
// KLSK_MEM_BARRIER;
// fast_ = true;
KLSK_MEM_BARRIER; // TODO: should we move it to the end?
fast_ = true;
}
void GroupCases::build_async(Executor &&executor, Notifier &&callback) {

106
src/core/main.cc

@ -71,59 +71,59 @@ int main() {
// std::println("{}: {}", kk.to_string(), kk.size());
// }
// 27-0x -> 0.960 (11284)
// 40-0x -> 0.992 (12012)
// 50-0x -> 0.999 (112640)
// 51-0x -> 0.890 (33792)
// 61-0x -> 0.996 (229680)
// 62-0x -> 0.955 (68904)
// 73-0x -> 0.982 (38412)
// 81-0x -> 0.992 (131040)
// 82-0x -> 0.786 (49140)
// 89-0x -> 0.999 (667800)
// 90-0x -> 0.989 (381600)
// 91-0x -> 0.892 (143100)
// 98-0x -> 0.998 (710220)
// 99-0x -> 0.993 (405840)
// 100-0x -> 0.942 (152190)
// 108-0x -> 0.999 (158400)
// 109-0x -> 0.968 (59400)
// 115-0x -> 0.973 (73920)
// 116-0x -> 0.641 (36960)
// 121-0x -> 0.998 (327600)
// 122-0x -> 0.969 (262080)
// 123-0x -> 0.801 (131040)
// 127-0x -> 0.998 (405888)
// 128-0x -> 0.994 (507360)
// 129-0x -> 0.976 (405888)
// 130-0x -> 0.865 (202944)
// 134-0x -> 0.999 (299264)
// 135-0x -> 0.997 (374080)
// 136-0x -> 0.990 (299264)
// 137-0x -> 0.909 (149632)
// 143-0x -> 0.998 (91392)
// 144-0x -> 0.956 (45696)
// 148-0x -> 0.933 (18960)
// 149-0x -> 0.554 (14220)
// 152-0x -> 0.988 (51660)
// 153-0x -> 0.888 (68880)
// 154-0x -> 0.558 (51660)
// 155-0x -> 0.989 (7120)
// 156-0x -> 0.989 (42720)
// 157-0x -> 0.978 (106800)
// 158-0x -> 0.930 (142400)
// 159-0x -> 0.762 (106800)
// 161-0x -> 0.977 (43704)
// 162-0x -> 0.960 (109260)
// 163-0x -> 0.928 (145680)
// 164-0x -> 0.744 (109260)
// 167-0x -> 0.997 (65880)
// 168-0x -> 0.990 (87840)
// 181-0x -> 0.584 (3084)
// 184-0x -> 0.350 (4288)
// 185-0x -> 0.313 (17152)
// 187-0x -> 0.933 (3196)
// 188-0x -> 0.342 (12784)
// 27-0x -> 0.960 (11284) | 1361 vs 348 us (3.911)
// 40-0x -> 0.992 (12012) | 1539 vs 367 us (4.193)
// 50-0x -> 0.999 (112640) | 17146 vs 3399 us (5.044)
// 51-0x -> 0.890 (33792) | 4154 vs 1482 us (2.803)
// 61-0x -> 0.996 (229680) | 36364 vs 8225 us (4.421)
// 62-0x -> 0.955 (68904) | 9691 vs 2941 us (3.295)
// 73-0x -> 0.982 (38412) | 5603 vs 1525 us (3.674)
// 81-0x -> 0.992 (131040) | 19927 vs 4811 us (4.142)
// 82-0x -> 0.786 (49140) | 5534 vs 3044 us (1.818)
// 89-0x -> 0.999 (667800) | 137581 vs 27786 us (4.951)
// 90-0x -> 0.989 (381600) | 63847 vs 16757 us (3.810)
// 91-0x -> 0.892 (143100) | 19233 vs 8284 us (2.322)
// 98-0x -> 0.998 (710220) | 149224 vs 33134 us (4.504)
// 99-0x -> 0.993 (405840) | 69654 vs 19636 us (3.547)
// 100-0x -> 0.942 (152190) | 22446 vs 8742 us (2.568)
// 108-0x -> 0.999 (158400) | 26240 vs 7689 us (3.413)
// 109-0x -> 0.968 (59400) | 9063 vs 3308 us (2.740)
// 115-0x -> 0.973 (73920) | 10601 vs 3349 us (3.165)
// 116-0x -> 0.641 (36960) | 3245 vs 3181 us (1.020) !!!
// 121-0x -> 0.998 (327600) | 55342 vs 16942 us (3.267)
// 122-0x -> 0.969 (262080) | 39966 vs 14853 us (2.691)
// 123-0x -> 0.801 (131040) | 15959 vs 10343 us (1.543)
// 127-0x -> 0.998 (405888) | 72385 vs 24294 us (2.980)
// 128-0x -> 0.994 (507360) | 90921 vs 30098 us (3.021)
// 129-0x -> 0.976 (405888) | 67859 vs 25681 us (2.642)
// 130-0x -> 0.865 (202944) | 28119 vs 16330 us (1.722)
// 134-0x -> 0.999 (299264) | 54399 vs 19285 us (2.821)
// 135-0x -> 0.997 (374080) | 67157 vs 24329 us (2.760)
// 136-0x -> 0.990 (299264) | 51483 vs 20071 us (2.565)
// 137-0x -> 0.909 (149632) | 22062 vs 12102 us (1.823)
// 143-0x -> 0.998 (91392) | 14670 vs 6050 us (2.425)
// 144-0x -> 0.956 (45696) | 7011 vs 3441 us (2.037)
// 148-0x -> 0.933 (18960) | 2362 vs 1053 us (2.243)
// 149-0x -> 0.554 (14220) | 975 vs 1422 us (0.686) !!!
// 152-0x -> 0.988 (51660) | 7476 vs 3708 us (2.016)
// 153-0x -> 0.888 (68880) | 9009 vs 5698 us (1.581)
// 154-0x -> 0.558 (51660) | 4253 vs 6546 us (0.650) !!!
// 155-0x -> 0.989 (7120) | 945 vs 708 us (1.335)
// 156-0x -> 0.989 (42720) | 6524 vs 3593 us (1.816)
// 157-0x -> 0.978 (106800) | 16333 vs 8605 us (1.898)
// 158-0x -> 0.930 (142400) | 20658 vs 12092 us (1.708)
// 159-0x -> 0.762 (106800) | 12675 vs 11828 us (1.072) !!!
// 161-0x -> 0.977 (43704) | 6775 vs 4351 us (1.557)
// 162-0x -> 0.960 (109260) | 17078 vs 10237 us (1.668)
// 163-0x -> 0.928 (145680) | 21689 vs 14229 us (1.524)
// 164-0x -> 0.744 (109260) | 13018 vs 13974 us (0.932) !!!
// 167-0x -> 0.997 (65880) | 10657 vs 6014 us (1.772)
// 168-0x -> 0.990 (87840) | 14110 vs 7916 us (1.782)
// 181-0x -> 0.584 (3084) | 200 vs 615 us (0.325) !!!
// 184-0x -> 0.350 (4288) | 167 vs 935 us (0.179) !!!
// 185-0x -> 0.313 (17152) | 723 vs 3639 us (0.199) !!!
// 187-0x -> 0.933 (3196) | 378 vs 543 us (0.696) !!!
// 188-0x -> 0.342 (12784) | 586 vs 3145 us (0.186) !!!
// raw -> ~2170ms
// perf-a -> ~1315ms

Loading…
Cancel
Save