Browse Source

update: several improvements of ShortCode

legacy
Dnomd343 6 months ago
parent
commit
f160f58e90
  1. 18
      src/core/benchmark/codec.cc
  2. 6
      src/core/short_code/internal/offset/offset.h
  3. 30
      src/core/short_code/internal/serialize.cc
  4. 23
      src/core/short_code/internal/serialize_chars.h
  5. 8
      src/core/short_code/internal/short_code.inl
  6. 11
      src/core/short_code/short_code.h

18
src/core/benchmark/codec.cc

@ -166,7 +166,7 @@ static void ShortCodeSerialize(benchmark::State &state) {
for (auto _ : state) { for (auto _ : state) {
for (const auto code : samples) { for (const auto code : samples) {
volatile auto ret = klotski::codec::ShortCode::string_encode(code); volatile auto ret = ShortCode::string_encode(code);
} }
} }
@ -185,7 +185,7 @@ static void ShortCodeDeserialize(benchmark::State &state) {
for (const auto code : samples) { for (const auto code : samples) {
volatile auto ret = klotski::codec::ShortCode::string_decode(code); volatile auto ret = ShortCode::string_decode(code);
} }
@ -197,8 +197,8 @@ static void ShortCodeDeserialize(benchmark::State &state) {
static void ShortCodeToCommonCode(benchmark::State &state) { static void ShortCodeToCommonCode(benchmark::State &state) {
ShortCode::speed_up(true); // ShortCode::speed_up(true);
// ShortCode::speed_up(false); ShortCode::speed_up(false);
// ShortCode::fast_decode(4091296); // ShortCode::fast_decode(4091296);
@ -221,8 +221,8 @@ static void ShortCodeToCommonCode(benchmark::State &state) {
} }
static void CommonCodeToShortCode(benchmark::State &state) { static void CommonCodeToShortCode(benchmark::State &state) {
ShortCode::speed_up(true); // ShortCode::speed_up(true);
// ShortCode::speed_up(false); ShortCode::speed_up(false);
auto common_code = CommonCode::unsafe_create(0x1A9BF0C00); auto common_code = CommonCode::unsafe_create(0x1A9BF0C00);
@ -248,11 +248,11 @@ static void CommonCodeToShortCode(benchmark::State &state) {
// BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256); // BENCHMARK(CommonCodeSerializeShorten)->Range(8, 256);
// BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256); // BENCHMARK(CommonCodeDeserializeShorten)->Range(8, 256);
// BENCHMARK(ShortCodeSerialize)->Range(8, 256); BENCHMARK(ShortCodeSerialize)->Range(8, 256);
// BENCHMARK(ShortCodeDeserialize)->Range(8, 256); BENCHMARK(ShortCodeDeserialize)->Range(8, 256);
// BENCHMARK(ShortCodeToCommonCode); // BENCHMARK(ShortCodeToCommonCode);
BENCHMARK(CommonCodeToShortCode); // BENCHMARK(CommonCodeToShortCode);
// static void CommonCodeDecode(benchmark::State &state) { // static void CommonCodeDecode(benchmark::State &state) {
// const auto tmp = str_common_codes(state.range(0)); // const auto tmp = str_common_codes(state.range(0));

6
src/core/short_code/internal/offset/offset.h

@ -6,9 +6,9 @@
namespace klotski::codec { namespace klotski::codec {
/// This is the index for basic ranges, and its position (0 ~ 7311884) in all /// This is the offset index of basic ranges, and its position (0 ~ 7311884) in
/// basic ranges is located according to the first 12-bit (0 ~ 4095) within the /// all basic ranges is located according to the first 12-bit (0 ~ 4095) within
/// 32-bit `range`. /// the 32-bit `range` value.
constexpr auto RANGES_GLOBAL_OFFSET = std::to_array<uint32_t>({ constexpr auto RANGES_GLOBAL_OFFSET = std::to_array<uint32_t>({
#include "constant/offset.inc" #include "constant/offset.inc"

30
src/core/short_code/internal/serialize.cc

@ -1,34 +1,38 @@
#include <ranges>
#include "serialize_chars.h" #include "serialize_chars.h"
#include "short_code/short_code.h" #include "short_code/short_code.h"
using klotski::codec::ShortCode; using klotski::codec::ShortCode;
using klotski::cases::ALL_CASES_NUM_;
std::string ShortCode::string_encode(uint32_t short_code) { std::string ShortCode::string_encode(uint32_t short_code) {
char result[5]; KLSK_ASSUME(short_code < ALL_CASES_NUM_);
for (int n = 0; n < 5; ++n) { std::array<char, 5> arr {};
result[4 - n] = SHORT_CODE_TABLE[short_code & 0b11111]; for (auto &c : arr | std::views::reverse) {
c = SHORT_CODE_TABLE[short_code & 0b11111]; // table convert
short_code >>= 5; short_code >>= 5;
} }
return {result, result + 5}; return {arr.begin(), arr.end()};
} }
std::optional<uint32_t> ShortCode::string_decode(const std::string_view short_code) { std::optional<uint32_t> ShortCode::string_decode(const std::string_view short_code) {
if (short_code.length() != 5) { if (short_code.length() != 5) {
return std::nullopt; // invalid string length return std::nullopt; // invalid string length
} }
uint32_t result = 0; uint32_t code = 0;
for (auto bit : short_code) { for (const uint8_t bit : short_code) {
if (bit < '1' || bit > 'z') { // invalid characters if (bit > 'z') { // invalid characters
return std::nullopt; return std::nullopt;
} }
result <<= 5; if (const auto val = SHORT_CODE_TABLE_REV[bit]; val != -1) {
result += (bit = SHORT_CODE_TABLE_REV[bit - 49]); // table convert (code <<= 5) += val;
if (bit == -1) { // invalid character continue;
return std::nullopt;
} }
return std::nullopt; // invalid character
} }
if (!check(result)) { // check converted short code if (!check(code)) { // check converted short code
return std::nullopt; return std::nullopt;
} }
return result; // apply convert result return code; // apply convert result
} }

23
src/core/short_code/internal/serialize_chars.h

@ -1,5 +1,3 @@
#pragma once
/// ShortCode Convert Table /// /// ShortCode Convert Table ///
/// ------------------------------------------------- /// /// ------------------------------------------------- ///
/// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 | /// /// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 | ///
@ -15,17 +13,28 @@
/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | /// /// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | ///
/// ------------------------------------------------- /// /// ------------------------------------------------- ///
#pragma once
#include <array>
namespace klotski::codec { namespace klotski::codec {
constexpr char SHORT_CODE_TABLE[32] { constexpr auto SHORT_CODE_TABLE = std::to_array<char>({
'1', '2', '3', '4', '5', '6', '7', '8', '9', // skip `0` '1', '2', '3', '4', '5', '6', '7', '8', '9', // skip `0`
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // skip `I` 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // skip `I`
'J', 'K', // skip `L` 'J', 'K', // skip `L`
'M', 'N', // skip `O` 'M', 'N', // skip `O`
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
}; });
static_assert(SHORT_CODE_TABLE.size() == 32);
constexpr char SHORT_CODE_TABLE_REV[74] { constexpr auto SHORT_CODE_TABLE_REV = std::to_array<char>({
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // | [0, 9]
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // | [10, 19]
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // | [20, 29]
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // | [30, 39]
-1, -1, -1, -1, -1, -1, -1, -1, -1, // | [40, 48]
0, 1, 2, 3, 4, 5, 6, 7, 8, // `1` ~ `9` | [49, 57] 0, 1, 2, 3, 4, 5, 6, 7, 8, // `1` ~ `9` | [49, 57]
-1, -1, -1, -1, -1, -1, -1, // | [58, 64] -1, -1, -1, -1, -1, -1, -1, // | [58, 64]
9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `A` ~ `J` | [65, 74] 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `A` ~ `J` | [65, 74]
@ -35,6 +44,8 @@ constexpr char SHORT_CODE_TABLE_REV[74] {
9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `a` ~ `j` | [97, 106] 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `a` ~ `j` | [97, 106]
18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `k` ~ `t` | [107, 116] 18, -1, 19, 20, -1, 21, 22, 23, 24, 25, // `k` ~ `t` | [107, 116]
26, 27, 28, 29, 30, 31, // `u` ~ `z` | [117, 122] 26, 27, 28, 29, 30, 31, // `u` ~ `z` | [117, 122]
}; });
static_assert(SHORT_CODE_TABLE_REV.size() == 'z' + 1);
} // namespace klotski::codec } // namespace klotski::codec

8
src/core/short_code/internal/short_code.inl

@ -1,12 +1,10 @@
#pragma once #pragma once
#include <bit>
#include "common_code/common_code.h" #include "common_code/common_code.h"
namespace klotski::codec { namespace klotski::codec {
// ------------------------------------------------------------------------------------- // // ----------------------------------------------------------------------------------------- //
inline ShortCode::ShortCode(const CommonCode common_code) { inline ShortCode::ShortCode(const CommonCode common_code) {
if (fast_) { if (fast_) {
@ -27,7 +25,7 @@ inline std::optional<ShortCode> ShortCode::create(const uint32_t short_code) {
return unsafe_create(short_code); return unsafe_create(short_code);
} }
// ------------------------------------------------------------------------------------- // // ----------------------------------------------------------------------------------------- //
inline ShortCode::operator uint32_t() const { inline ShortCode::operator uint32_t() const {
return code_; return code_;
@ -97,7 +95,7 @@ inline std::optional<ShortCode> ShortCode::from_common_code(const std::string_vi
return CommonCode::from_string(common_code).transform(convert); return CommonCode::from_string(common_code).transform(convert);
} }
// ------------------------------------------------------------------------------------- // // ----------------------------------------------------------------------------------------- //
constexpr auto operator==(const ShortCode &lhs, const uint32_t rhs) { constexpr auto operator==(const ShortCode &lhs, const uint32_t rhs) {
return lhs.code_ == rhs; return lhs.code_ == rhs;

11
src/core/short_code/short_code.h

@ -1,7 +1,7 @@
/// Klotski Engine by Dnomd343 @2024 /// Klotski Engine by Dnomd343 @2024
/// ShortCode is a high-compression encoding scheme based on CommonCode. Since /// ShortCode is a high-compression encoding scheme based on CommonCode. Since
/// there are a total of 29334498 valid klotski cases, arrange then from small /// there are a total of 29334498 valid klotski cases, arrange them from small
/// to large by their CommonCodes (36-bit positive integers), and use the index /// to large by their CommonCodes (36-bit positive integers), and use the index
/// as the ShortCode. /// as the ShortCode.
@ -160,7 +160,7 @@ private:
static KLSK_INLINE uint64_t fast_decode(uint32_t short_code); static KLSK_INLINE uint64_t fast_decode(uint32_t short_code);
/// Convert CommonCode to ShortCode based on AllCases data. /// Convert CommonCode to ShortCode based on AllCases data.
static uint32_t fast_encode(uint64_t common_code); static KLSK_INLINE uint32_t fast_encode(uint64_t common_code);
/// Convert ShortCode to CommonCode based on BasicRanges data. /// Convert ShortCode to CommonCode based on BasicRanges data.
static uint64_t tiny_decode(uint32_t short_code); static uint64_t tiny_decode(uint32_t short_code);
@ -171,10 +171,10 @@ private:
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
/// Serialize ShortCode into 5-bit length string. /// Serialize ShortCode into 5-bit length string.
static std::string string_encode(uint32_t short_code); static KLSK_INLINE std::string string_encode(uint32_t short_code);
/// Deserialize ShortCode from string and return nullopt on error. /// Deserialize ShortCode from string and return nullopt on error.
static std::optional<uint32_t> string_decode(std::string_view short_code); static KLSK_INLINE std::optional<uint32_t> string_decode(std::string_view short_code);
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
@ -193,6 +193,9 @@ private:
// ------------------------------------------------------------------------------------- // // ------------------------------------------------------------------------------------- //
}; };
static_assert(std::is_standard_layout_v<ShortCode>);
static_assert(std::is_trivially_copyable_v<ShortCode>);
} // namespace klotski::codec } // namespace klotski::codec
#include "internal/short_code.inl" #include "internal/short_code.inl"

Loading…
Cancel
Save