Browse Source

perf: optimal load factor

master
Dnomd343 3 weeks ago
parent
commit
df76854ab0
  1. 7
      src/core/benchmark/fast_cal.cc
  2. 6
      src/core/fast_cal/internal/demo.cc
  3. 2
      src/core/group/group.h
  4. 45
      src/core/group/internal/extend.cc
  5. 71
      src/core/group/internal/load_factor.py
  6. 21365
      src/core/group/internal/load_factor.txt
  7. 45
      src/core/main.cc

7
src/core/benchmark/fast_cal.cc

@ -4,6 +4,8 @@
#include "raw_code/raw_code.h"
#include "group/group.h"
using klotski::codec::CommonCode;
static void FastCalBenchmark(benchmark::State &state) {
@ -11,9 +13,10 @@ static void FastCalBenchmark(benchmark::State &state) {
auto code = CommonCode::unsafe_create(0x1A9BF0C00).to_raw_code();
for (auto _ : state) {
auto fc = FastCal(code);
benchmark::DoNotOptimize(fc.demo());
// auto fc = FastCal(code);
// benchmark::DoNotOptimize(fc.demo());
// benchmark::DoNotOptimize(FastCal_demo(code));
auto tmp = klotski::cases::Group_extend(code);
}
}

6
src/core/fast_cal/internal/demo.cc

@ -68,7 +68,7 @@ public:
explicit FCDemo(RawCode raw_code) : codes_(GroupUnion::from_raw_code(raw_code).max_group_size()) {
auto reserve = GroupUnion::from_raw_code(raw_code).max_group_size();
// codes_.reserve(reserve);
cases_.reserve(reserve);
cases_.reserve(286730);
codes_.emplace_back(raw_code.unwrap());
cases_.emplace(raw_code, data_t {0, 0}); // without mask
}
@ -99,11 +99,15 @@ public:
codes_.pop();
if (result != 0) {
// std::cout << cases_.size() << std::endl;
// std::cout << cases_.load_factor() << std::endl;
return RawCode::unsafe_create(result);
}
}
std::cout << cases_.load_factor() << std::endl;
return RawCode::unsafe_create(0);
// while (offset_ != codes_.size()) {

2
src/core/group/group.h

@ -266,6 +266,8 @@ private:
/// Spawn all the unsorted codes of the current group.
std::vector<codec::RawCode> Group_extend(codec::RawCode raw_code, uint32_t reserve = 0);
double Group_load_factor(codec::RawCode raw_code, double coff);
class GroupCases {
public:
// ------------------------------------------------------------------------------------- //

45
src/core/group/internal/extend.cc

@ -1,5 +1,7 @@
#include <absl/container/flat_hash_map.h>
#include <parallel_hashmap/phmap.h>
#include "mover/mover.h"
#include "group/group.h"
@ -13,10 +15,12 @@ using klotski::cases::GroupUnion;
std::vector<RawCode> klotski::cases::Group_extend(RawCode raw_code, uint32_t reserve) {
std::vector<RawCode> codes;
absl::flat_hash_map<uint64_t, uint64_t> cases; // <code, mask>
reserve = reserve ? reserve : GroupUnion::from_raw_code(raw_code).max_group_size();
codes.reserve(reserve);
cases.reserve(reserve);
// absl::flat_hash_map<uint64_t, uint64_t> cases; // <code, mask>
phmap::flat_hash_map<uint64_t, uint64_t> cases; // <code, mask>
// reserve = reserve ? reserve : GroupUnion::from_raw_code(raw_code).max_group_size();
// reserve = 25955;
codes.reserve(GroupUnion::from_raw_code(raw_code).max_group_size());
cases.reserve(25955 * 1.56);
auto core = MaskMover([&codes, &cases](uint64_t code, uint64_t mask) {
if (const auto match = cases.find(code); match != cases.end()) {
@ -34,9 +38,42 @@ std::vector<RawCode> klotski::cases::Group_extend(RawCode raw_code, uint32_t res
auto curr = codes[offset++].unwrap();
core.next_cases(curr, cases.find(curr)->second);
}
// std::cout << cases.size() << std::endl;
// std::cout << cases.load_factor() << std::endl;
return codes;
}
double klotski::cases::Group_load_factor(RawCode raw_code, double coff) {
std::vector<RawCode> codes;
phmap::flat_hash_map<uint64_t, uint64_t> cases; // <code, mask>
const auto reserve = GroupUnion::from_raw_code(raw_code).max_group_size();
codes.reserve(reserve);
cases.reserve(static_cast<size_t>(coff * reserve));
auto core = MaskMover([&codes, &cases](uint64_t code, uint64_t mask) {
if (const auto match = cases.find(code); match != cases.end()) {
match->second |= mask; // update mask
return;
}
cases.emplace(code, mask);
codes.emplace_back(RawCode::unsafe_create(code)); // new case
});
uint64_t offset = 0;
codes.emplace_back(raw_code);
cases.emplace(raw_code, 0); // without mask
while (offset != codes.size()) {
auto curr = codes[offset++].unwrap();
core.next_cases(curr, cases.find(curr)->second);
}
// if (cases.size() != reserve) {
// std::cout << "reserve size error" << std::endl;
// std::abort();
// }
return cases.load_factor();
}
//RangesUnion Group::cases() const {
//
// // TODO: add white list for single-group unions

71
src/core/group/internal/load_factor.py

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import re
# (type_id, pattern_id): (load_factor_a, coff, load_factor_b)
type data_type = dict[tuple[int, int], tuple[float, float, float]]
def load_data(lines: list[str]) -> data_type:
result = {}
key, items = (), []
for line in lines:
if line.startswith('['):
match = re.match(r'^\[(\d+), (\d+)]$', line)
key = (int(match[1]), int(match[2]))
elif not line:
assert len(key) == 2
assert items[0][0] == 1.0
if len(items) == 1:
assert items[0][1] < 0.1 # skip low cases
else:
assert len(items) == 2
result[key] = items[0][1], items[1][0], items[1][1]
key, items = (), []
else:
match = re.match(r'^(\d\.\d{2}), (\d\.\d{6})$', line)
items.append((float(match[1]), float(match[2])))
return result
def analyse_data(data: data_type) -> None:
data = {x: y for x, y in data.items() if y[0] >= 0.5}
times = set([int(x * 1000 / y) / 1000 for x, _, y in data.values()])
print(sorted(times))
type_a, type_b, type_c = [], [], []
for group, (load_factor, coff, _) in data.items():
if load_factor <= 0.55:
type_a.append((group, load_factor, coff))
elif coff <= 1.3:
type_b.append((group, load_factor, coff))
else:
type_c.append((group, load_factor, coff))
type_c = sorted(type_c, key=lambda x: x[2])
for item in type_c:
print(item)
# ((117, 0), 0.571359, 1.54) -> 4680
# ((118, 0), 0.571298, 1.54) -> 37440
# ((133, 0), 0.570803, 1.54) -> 149632
# ((134, 0), 0.570558, 1.54) -> 299136
# ((63, 0), 0.568915, 1.55) -> 582
# ((136, 0), 0.565568, 1.55) -> 296520
# ((112, 0), 0.563973, 1.56) -> 36960
# ((113, 0), 0.563969, 1.56) -> 73920
# ((197, 0), 0.714286, 1.6) -> 5
# ((197, 1), 0.714286, 1.6) -> 5
# ((197, 2), 0.714286, 1.6) -> 5
# ((197, 3), 0.714286, 1.6) -> 5
# ((197, 4), 0.714286, 1.6) -> 5
# ((197, 5), 0.714286, 1.6) -> 5
if __name__ == '__main__':
raw = open('load_factor.txt').read().splitlines()
analyse_data(load_data(raw))

21365
src/core/group/internal/load_factor.txt

File diff suppressed because it is too large

45
src/core/main.cc

@ -34,6 +34,10 @@ using klotski::cases::TYPE_ID_LIMIT;
using klotski::cases::ALL_CASES_NUM_;
using klotski::codec::SHORT_CODE_LIMIT;
using klotski::cases::PATTERN_DATA;
using klotski::cases::PATTERN_OFFSET;
using klotski::cases::Group_load_factor;
int main() {
// const auto start = clock();
@ -42,15 +46,48 @@ int main() {
const auto start = std::chrono::system_clock::now();
auto code = CommonCode::unsafe_create(0x1A9BF0C00).to_raw_code();
for (int i = 0; i < 100; ++i) {
// auto code = CommonCode::unsafe_create(0x1A9BF0C00).to_raw_code();
// for (int i = 0; i < 100; ++i) {
// FastCal fc {code};
// fc.solve();
// klotski::cases::Group_extend(code);
FastCal_demo(code);
break;
// FastCal_demo(code);
// break;
// }
for (uint32_t type_id = 0; type_id < TYPE_ID_LIMIT; ++type_id) {
auto group_union = GroupUnion::unsafe_create(type_id);
for (uint32_t pattern_id = 0; pattern_id < group_union.pattern_num(); ++pattern_id) {
std::cout << std::format("[{}, {}]\n", type_id, pattern_id);
auto seed = CommonCode::unsafe_create(PATTERN_DATA[PATTERN_OFFSET[type_id] + pattern_id] >> 23);
double coff = 1.0;
double last_val = -1;
while (true) {
auto val = Group_load_factor(seed.to_raw_code(), coff);
if (int(val * 1000) != int(last_val * 1000)) {
std::cout << std::format("{:.2f}, {:.6f}\n", coff, val);
last_val = val;
}
if (coff >= 2.0) {
break;
}
coff += 0.01;
}
std::cout << std::endl;
}
}
// std::cout << Group_load_factor(code, 0.5) << std::endl;
// std::cout << Group_load_factor(code, 0.8) << std::endl;
// std::cout << Group_load_factor(code, 1.0) << std::endl;
// std::cout << Group_load_factor(code, 1.2) << std::endl;
// std::cout << Group_load_factor(code, 1.5) << std::endl;
// std::cout << Group_load_factor(code, 2.0) << std::endl;
// std::cout << Group_load_factor(code, 3.0) << std::endl;
// std::cout << Group_load_factor(code, 5.0) << std::endl;
// for (int i = 0; i < 10000000; ++i) {
// MaskMover mover([](uint64_t code, uint64_t mask) {
// volatile auto tmp_1 = code;

Loading…
Cancel
Save