Browse Source

perf: optimize string encoding of ShortCode

legacy
Dnomd343 7 months ago
parent
commit
ca63237dfd
  1. 135
      src/core/benchmark/codec.cc
  2. 16
      src/core/short_code/internal/serialize.cc
  3. 20
      src/core/short_code/internal/serialize_chars.h
  4. 2
      src/core/short_code/short_code.h

135
src/core/benchmark/codec.cc

@ -48,59 +48,28 @@ std::vector<uint64_t> common_code_samples(uint64_t num) {
} }
// std::vector<uint64_t> select_codes(uint64_t num) { std::vector<uint32_t> short_code_samples(uint64_t num) {
// auto codes = all_common_codes();
// uint32_t part_size = klotski::codec::SHORT_CODE_LIMIT / num;
// // return {codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411]};
// // std::array<uint64_t, 140> samples = { // uint64_t offset = 0;
// std::vector<uint64_t> samples = { uint32_t offset = part_size / 2;
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], std::vector<uint32_t> result;
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], for (uint32_t i = 0; i < num; ++i) {
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], uint32_t index = i * part_size + offset;
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], // // std::cout << "index = " << index << std::endl;
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], // uint64_t kk[] {343, 666, 114514, 35324, 123454, 76453, 93411};
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], // uint64_t index = kk[i % 7];
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// result.emplace_back(index);
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], }
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], return result;
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411], }
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
//
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
//
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// codes[343], codes[666], codes[114514], codes[35324], codes[123454], codes[76453], codes[93411],
// };
//
// return {samples.begin(), samples.begin() + num};
// }
std::vector<std::string> str_common_codes(uint64_t num, bool shorten) { std::vector<std::string> str_common_codes(uint64_t num, bool shorten) {
// auto src = select_codes(num); // auto src = select_codes(num);
@ -116,6 +85,20 @@ std::vector<std::string> str_common_codes(uint64_t num, bool shorten) {
return codes; return codes;
} }
std::vector<std::string> str_short_codes(uint64_t num) {
auto src = short_code_samples(num);
std::vector<std::string> codes;
codes.reserve(src.size());
for (auto x : src) {
codes.emplace_back(klotski::codec::ShortCode::unsafe_create(x).to_string());
}
return codes;
}
static void CommonCodeSerialize(benchmark::State &state) { static void CommonCodeSerialize(benchmark::State &state) {
// common_code_samples(8); // common_code_samples(8);
@ -174,10 +157,48 @@ static void CommonCodeDeserializeShorten(benchmark::State &state) {
} }
BENCHMARK(CommonCodeSerialize)->Range(8, 256); static void ShortCodeSerialize(benchmark::State &state) {
BENCHMARK(CommonCodeDeserialize)->Range(8, 256); std::vector<uint32_t> samples = short_code_samples(state.range(0));
BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256);
BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256); for (auto _ : state) {
for (const auto code : samples) {
volatile auto ret = klotski::codec::ShortCode::string_encode(code);
}
}
state.SetItemsProcessed(state.iterations() * state.range(0));
}
static void ShortCodeDeserialize(benchmark::State &state) {
const auto tmp = str_short_codes(state.range(0));
const std::vector<std::string_view> samples {tmp.begin(), tmp.end()};
for (auto _ : state) {
for (const auto code : samples) {
volatile auto ret = klotski::codec::ShortCode::string_decode(code);
}
}
state.SetItemsProcessed(state.iterations() * state.range(0));
}
// BENCHMARK(CommonCodeSerialize)->Range(8, 256);
// BENCHMARK(CommonCodeDeserialize)->Range(8, 256);
// BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256);
// BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256);
BENCHMARK(ShortCodeSerialize)->Range(8, 256);
BENCHMARK(ShortCodeDeserialize)->Range(8, 256);
// static void CommonCodeDecode(benchmark::State &state) { // static void CommonCodeDecode(benchmark::State &state) {
// const auto tmp = str_common_codes(state.range(0)); // const auto tmp = str_common_codes(state.range(0));

16
src/core/short_code/internal/serialize.cc

@ -4,28 +4,24 @@
using klotski::codec::ShortCode; using klotski::codec::ShortCode;
std::string ShortCode::string_encode(uint32_t short_code) { std::string ShortCode::string_encode(uint32_t short_code) {
char result[6]; char result[5];
result[5] = '\0'; // string ending flag
for (int n = 0; n < 5; ++n) { for (int n = 0; n < 5; ++n) {
result[4 - n] = SHORT_CODE_TABLE[short_code & 0b11111]; result[4 - n] = SHORT_CODE_TABLE[short_code & 0b11111];
short_code >>= 5; short_code >>= 5;
} }
return result; return {result, result + 5};
} }
std::optional<uint32_t> ShortCode::string_decode(const std::string &short_code) { std::optional<uint32_t> ShortCode::string_decode(const std::string_view short_code) {
if (short_code.length() != 5) { if (short_code.length() != 5) {
return std::nullopt; // invalid string length return std::nullopt; // invalid string length
} }
uint64_t result = 0; uint32_t result = 0;
for (auto bit : short_code) { for (auto bit : short_code) {
result <<= 5; if (bit < '1' || bit > 'z') { // invalid characters
if (bit >= 'a' && bit <= 'z') {
bit -= 32; // convert to uppercase
}
if (bit < '1' || bit > 'Z') { // invalid characters
return std::nullopt; return std::nullopt;
} }
result <<= 5;
result += (bit = SHORT_CODE_TABLE_REV[bit - 49]); // table convert result += (bit = SHORT_CODE_TABLE_REV[bit - 49]); // table convert
if (bit == -1) { // invalid character if (bit == -1) { // invalid character
return std::nullopt; return std::nullopt;

20
src/core/short_code/internal/serialize_chars.h

@ -15,11 +15,9 @@
/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | /// /// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | ///
/// ------------------------------------------------- /// /// ------------------------------------------------- ///
#include <cstdint>
namespace klotski::codec { namespace klotski::codec {
constexpr int8_t SHORT_CODE_TABLE[32] { constexpr char SHORT_CODE_TABLE[32] {
'1', '2', '3', '4', '5', '6', '7', '8', '9', // skip `0` '1', '2', '3', '4', '5', '6', '7', '8', '9', // skip `0`
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // skip `I` 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // skip `I`
'J', 'K', // skip `L` 'J', 'K', // skip `L`
@ -27,12 +25,16 @@ constexpr int8_t SHORT_CODE_TABLE[32] {
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
}; };
constexpr int8_t SHORT_CODE_TABLE_REV[42] { constexpr char SHORT_CODE_TABLE_REV[74] {
0, 1, 2, 3, 4, 5, 6, 7, 8, // `1`(49) ~ `9`(57) 0, 1, 2, 3, 4, 5, 6, 7, 8, // `1` ~ `9` | [49, 57]
-1, -1, -1, -1, -1, -1, -1, // `:`(58) ~ `@`(64) -1, -1, -1, -1, -1, -1, -1, // | [58, 64]
9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `A`(65) ~ `J`(74) 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `A` ~ `J` | [65, 74]
18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `K`(75) ~ `T`(84) 18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `K` ~ `T` | [75, 84]
26, 27, 28, 29, 30, 31, // `U`(85) ~ `Z`(90) 26, 27, 28, 29, 30, 31, // `U` ~ `Z` | [85, 90]
-1, -1, -1, -1, -1, -1, // | [91, 96]
9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `a` ~ `j` | [97, 106]
18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `k` ~ `t` | [107, 116]
26, 27, 28, 29, 30, 31, // `u` ~ `z` | [117, 122]
}; };
} // namespace klotski::codec } // namespace klotski::codec

2
src/core/short_code/short_code.h

@ -174,7 +174,7 @@ private:
static std::string string_encode(uint32_t short_code); static std::string string_encode(uint32_t short_code);
/// Deserialize ShortCode from string and return nullopt on error. /// Deserialize ShortCode from string and return nullopt on error.
static std::optional<uint32_t> string_decode(const std::string &short_code); static std::optional<uint32_t> string_decode(std::string_view short_code);
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
}; };

Loading…
Cancel
Save