Browse Source

perf: optimize string encoding of ShortCode

legacy
Dnomd343 7 months ago
parent
commit
ca63237dfd
  1. 135
      src/core/benchmark/codec.cc
  2. 16
      src/core/short_code/internal/serialize.cc
  3. 20
      src/core/short_code/internal/serialize_chars.h
  4. 2
      src/core/short_code/short_code.h

135
src/core/benchmark/codec.cc

@ -48,59 +48,28 @@ std::vector<uint64_t> common_code_samples(uint64_t num) {
}
// std::vector<uint64_t> select_codes(uint64_t num) {
// auto codes = all_common_codes();
//
// // return {codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411]};
// // std::array<uint64_t, 140> samples = {
// std::vector<uint64_t> samples = {
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
//
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
//
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
//
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// };
//
// return {samples.begin(), samples.begin() + num};
// }
std::vector<uint32_t> short_code_samples(uint64_t num) {
uint32_t part_size = klotski::codec::SHORT_CODE_LIMIT / num;
// uint64_t offset = 0;
uint32_t offset = part_size / 2;
std::vector<uint32_t> result;
for (uint32_t i = 0; i < num; ++i) {
uint32_t index = i * part_size + offset;
// // std::cout << "index = " << index << std::endl;
// uint64_t kk[] {343, 666, 114514, 35324, 123454, 76453, 93411};
// uint64_t index = kk[i % 7];
result.emplace_back(index);
}
return result;
}
std::vector<std::string> str_common_codes(uint64_t num, bool shorten) {
// auto src = select_codes(num);
@ -116,6 +85,20 @@ std::vector<std::string> str_common_codes(uint64_t num, bool shorten) {
return codes;
}
std::vector<std::string> str_short_codes(uint64_t num) {
auto src = short_code_samples(num);
std::vector<std::string> codes;
codes.reserve(src.size());
for (auto x : src) {
codes.emplace_back(klotski::codec::ShortCode::unsafe_create(x).to_string());
}
return codes;
}
static void CommonCodeSerialize(benchmark::State &state) {
// common_code_samples(8);
@ -174,10 +157,48 @@ static void CommonCodeDeserializeShorten(benchmark::State &state) {
}
BENCHMARK(CommonCodeSerialize)->Range(8, 256);
BENCHMARK(CommonCodeDeserialize)->Range(8, 256);
BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256);
BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256);
static void ShortCodeSerialize(benchmark::State &state) {
std::vector<uint32_t> samples = short_code_samples(state.range(0));
for (auto _ : state) {
for (const auto code : samples) {
volatile auto ret = klotski::codec::ShortCode::string_encode(code);
}
}
state.SetItemsProcessed(state.iterations() * state.range(0));
}
static void ShortCodeDeserialize(benchmark::State &state) {
const auto tmp = str_short_codes(state.range(0));
const std::vector<std::string_view> samples {tmp.begin(), tmp.end()};
for (auto _ : state) {
for (const auto code : samples) {
volatile auto ret = klotski::codec::ShortCode::string_decode(code);
}
}
state.SetItemsProcessed(state.iterations() * state.range(0));
}
// BENCHMARK(CommonCodeSerialize)->Range(8, 256);
// BENCHMARK(CommonCodeDeserialize)->Range(8, 256);
// BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256);
// BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256);
BENCHMARK(ShortCodeSerialize)->Range(8, 256);
BENCHMARK(ShortCodeDeserialize)->Range(8, 256);
// static void CommonCodeDecode(benchmark::State &state) {
// const auto tmp = str_common_codes(state.range(0));

16
src/core/short_code/internal/serialize.cc

@ -4,28 +4,24 @@
using klotski::codec::ShortCode;
std::string ShortCode::string_encode(uint32_t short_code) {
char result[6];
result[5] = '\0'; // string ending flag
char result[5];
for (int n = 0; n < 5; ++n) {
result[4 - n] = SHORT_CODE_TABLE[short_code & 0b11111];
short_code >>= 5;
}
return result;
return {result, result + 5};
}
std::optional<uint32_t> ShortCode::string_decode(const std::string &short_code) {
std::optional<uint32_t> ShortCode::string_decode(const std::string_view short_code) {
if (short_code.length() != 5) {
return std::nullopt; // invalid string length
}
uint64_t result = 0;
uint32_t result = 0;
for (auto bit : short_code) {
result <<= 5;
if (bit >= 'a' && bit <= 'z') {
bit -= 32; // convert to uppercase
}
if (bit < '1' || bit > 'Z') { // invalid characters
if (bit < '1' || bit > 'z') { // invalid characters
return std::nullopt;
}
result <<= 5;
result += (bit = SHORT_CODE_TABLE_REV[bit - 49]); // table convert
if (bit == -1) { // invalid character
return std::nullopt;

20
src/core/short_code/internal/serialize_chars.h

@ -15,11 +15,9 @@
/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | ///
/// ------------------------------------------------- ///
#include <cstdint>
namespace klotski::codec {
constexpr int8_t SHORT_CODE_TABLE[32] {
constexpr char SHORT_CODE_TABLE[32] {
'1', '2', '3', '4', '5', '6', '7', '8', '9', // skip `0`
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // skip `I`
'J', 'K', // skip `L`
@ -27,12 +25,16 @@ constexpr int8_t SHORT_CODE_TABLE[32] {
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
};
constexpr int8_t SHORT_CODE_TABLE_REV[42] {
0, 1, 2, 3, 4, 5, 6, 7, 8, // `1`(49) ~ `9`(57)
-1, -1, -1, -1, -1, -1, -1, // `:`(58) ~ `@`(64)
9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `A`(65) ~ `J`(74)
18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `K`(75) ~ `T`(84)
26, 27, 28, 29, 30, 31, // `U`(85) ~ `Z`(90)
constexpr char SHORT_CODE_TABLE_REV[74] {
0, 1, 2, 3, 4, 5, 6, 7, 8, // `1` ~ `9` | [49, 57]
-1, -1, -1, -1, -1, -1, -1, // | [58, 64]
9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `A` ~ `J` | [65, 74]
18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `K` ~ `T` | [75, 84]
26, 27, 28, 29, 30, 31, // `U` ~ `Z` | [85, 90]
-1, -1, -1, -1, -1, -1, // | [91, 96]
9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `a` ~ `j` | [97, 106]
18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `k` ~ `t` | [107, 116]
26, 27, 28, 29, 30, 31, // `u` ~ `z` | [117, 122]
};
} // namespace klotski::codec

2
src/core/short_code/short_code.h

@ -174,7 +174,7 @@ private:
static std::string string_encode(uint32_t short_code);
/// Deserialize ShortCode from string and return nullopt on error.
static std::optional<uint32_t> string_decode(const std::string &short_code);
static std::optional<uint32_t> string_decode(std::string_view short_code);
// ------------------------------------------------------------------------------------- //
};

Loading…
Cancel
Save