From d2ed1ff5bda86823819233171d75638659df1a68 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Mon, 2 Oct 2023 18:21:57 +0800 Subject: [PATCH] feat: string serialize and deserialize of CommonCode --- src/core/CMakeLists.txt | 19 +++- src/core/common_code/common_code.cc | 1 + src/core/common_code/common_code.h | 141 ++++++++++++++++++++++++++++ src/core/common_code/convert.cc | 89 ++++++++++++++++++ src/core/common_code/serialize.cc | 79 ++++++++++++++++ src/core/main.cc | 42 ++++++++- src/core/utils/utility.h | 9 ++ src/core_test/codec/common_code.cc | 0 8 files changed, 377 insertions(+), 3 deletions(-) create mode 100644 src/core/common_code/common_code.cc create mode 100644 src/core/common_code/common_code.h create mode 100644 src/core/common_code/convert.cc create mode 100644 src/core/common_code/serialize.cc create mode 100644 src/core_test/codec/common_code.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6a92dfe..4708e0e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -10,7 +10,22 @@ add_compile_options(-fno-exceptions) include_directories(utils) include_directories(all_cases) +include_directories(common_code) -add_library(${PROJECT_NAME} all_cases/basic_ranges.cc all_cases/all_cases.cc ffi/all_cases.cc) +add_library(${PROJECT_NAME} + all_cases/basic_ranges.cc + all_cases/all_cases.cc + ffi/all_cases.cc + common_code/common_code.cc + common_code/serialize.cc + common_code/convert.cc +) -add_executable(${PROJECT_NAME}_cli main.cc all_cases/basic_ranges.cc all_cases/all_cases.cc) +add_executable(${PROJECT_NAME}_cli main.cc + all_cases/basic_ranges.cc + all_cases/all_cases.cc + ffi/all_cases.cc + common_code/common_code.cc + common_code/serialize.cc + common_code/convert.cc +) diff --git a/src/core/common_code/common_code.cc b/src/core/common_code/common_code.cc new file mode 100644 index 0000000..69ffd52 --- /dev/null +++ b/src/core/common_code/common_code.cc @@ -0,0 +1 @@ +#include "common_code.h" diff --git a/src/core/common_code/common_code.h b/src/core/common_code/common_code.h new file mode 100644 index 0000000..e4cae2c --- /dev/null +++ b/src/core/common_code/common_code.h @@ -0,0 +1,141 @@ +#pragma once + +/// CommonCode is a generic klotski encoding that records an valid case using +/// 36-bit lengths, and stored in a `uint64_t`. + +/// Since there is only one `2x2` block, it is encoded separately. Its upper +/// left corner is called `head`, it has 12 possible positions and is encoded +/// using 4-bit length (0 ~ 15). +/// +/// 00 01 02 03 +/// 04 05 06 07 00 01 02 +/// 08 09 10 11 04 05 06 <- head of 2x2 block +/// 12 13 14 15 08 09 10 (without 03/07/11/15) +/// 16 17 18 19 12 13 14 + +/// Treat spaces as special blocks, there can be four kinds of blocks in total, +/// namely `space`, `1x2`, `2x1`, `1x1`. Each of them is represented by 2-bit, +/// which are `00` `01` `10` `11`. Arrange them according to their position and +/// size, and we can get a binary sequence. +/// +/// ( 2x2 -> # # ) | ( 2x1 -> # ) | ( 1x2 -> # # ) | ( 1x1 -> # ) +/// ( # # ) | ( # ) | | + +/// This sequence can have up to 16 blocks, aka 32-bit in length. Therefore, in +/// order to be compatible with all cases, the length of this part of the code +/// is set to 32-bit. In addition, for the convenience of reading, it is +/// stipulated that the sequence starts from the high bit, and the remaining +/// bits should be filled with `0`. + +/// Putting the content of the `head` in the upper 4-bit, and the lower 32-bit +/// to store the sequence content, a 36-bit length code can be obtained, which +/// corresponds to any valid layout one-to-one. When CommonCode is converted +/// into a string, just directly export the hexadecimal data, and get a 9-bit +/// string encoding. Characters are not case-sensitive, but it is recommended +/// to use uppercase letters. In addition, the last `0` of the string is +/// allowed to be omitted, and it can be completed to 9 digits when decoding, +/// but note that if the encoding is all `0`, it should be reduced to the +/// remaining one `0`. + +/// Eg1: +/// % # # % 2x2 -> head = 1 +/// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ... +/// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00 +/// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000 +/// * & A 9 B F 0 C 0 0 +/// CommonCode = 0x1A9BF0C00 -> "1A9BF0C" + +/// Eg2: +/// * @ & % 2x2 -> head = 4 +/// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ... +/// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00 +/// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000 +/// @ % % F E A 1 3 4 0 0 +/// CommonCode = 0x4FEA13400 -> "4FEA134" + +#include +#include +#include + +namespace klotski { +namespace codec { + +class RawCode; +class ShortCode; +class CommonCode; + +//typedef std::vector RawCodes; +//typedef std::vector ShortCodes; +//typedef std::vector CommonCodes; + +class CommonCode { +public: + /// Validity check + static bool check(uint64_t common_code) noexcept; + + /// Operators of CommonCode + explicit operator uint64_t() const noexcept { return code_; } + friend std::ostream& operator<<(std::ostream &out, const CommonCode &self); + + /// Export functions + RawCode to_raw_code() const noexcept; + ShortCode to_short_code() const noexcept; + std::string to_string(bool shorten = false) const noexcept; + constexpr uint64_t unwrap() const noexcept { return code_; } + + /// CommonCode constructors + CommonCode() = delete; + explicit CommonCode(RawCode raw_code) noexcept; + explicit CommonCode(ShortCode short_code) noexcept; + + /// CommonCode initializations + static CommonCode unsafe_create(uint64_t common_code) noexcept; + static std::optional create(uint64_t common_code) noexcept; + + static std::optional from_string(std::string &&common_code) noexcept; + static std::optional from_string(const std::string &common_code) noexcept; + + static CommonCode from_raw_code(RawCode raw_code) noexcept; + static std::optional from_raw_code(uint64_t raw_code) noexcept; + + static CommonCode from_short_code(ShortCode short_code) noexcept; + static std::optional from_short_code(uint32_t short_code) noexcept; + static std::optional from_short_code(std::string &&short_code) noexcept; + static std::optional from_short_code(const std::string &short_code) noexcept; + + /// Batch conversions +// static CommonCodes convert(const RawCodes &raw_codes) noexcept; +// static CommonCodes convert(const ShortCodes &short_codes) noexcept; + + static std::string string_encode(uint64_t common_code, bool shorten) noexcept; + static std::optional string_decode(const std::string &common_code) noexcept; + +private: + uint64_t code_; +// static std::string string_encode(uint64_t common_code, bool shorten) noexcept; +// static std::optional string_decode(const std::string &common_code) noexcept; +}; + +/// CommonCode create +//inline CommonCode CommonCode::create(uint64_t common_code) { +// return CommonCode(common_code); // with check +//} + +/// CommonCode create without check +inline CommonCode CommonCode::unsafe_create(uint64_t common_code) noexcept { + return *(CommonCode*)&common_code; // init directly +} + +/// Compare implements +//inline bool operator==(uint64_t c1, const CommonCode &c2) noexcept { return c1 == c2.unwrap(); } +//inline bool operator==(const CommonCode &c1, uint64_t c2) noexcept { return c1.unwrap() == c2; } +//inline bool operator!=(uint64_t c1, const CommonCode &c2) noexcept { return !(c1 == c2); } +//inline bool operator!=(const CommonCode &c1, uint64_t c2) noexcept { return !(c1 == c2); } + +//inline bool operator<(const CommonCode &c1, const CommonCode &c2) noexcept { return c1.unwrap() < c2.unwrap(); } +//inline bool operator>(const CommonCode &c1, const CommonCode &c2) noexcept { return c1.unwrap() > c2.unwrap(); } +//inline bool operator==(const CommonCode &c1, const CommonCode &c2) noexcept { return c1.unwrap() == c2.unwrap(); } +//inline bool operator!=(const CommonCode &c1, const CommonCode &c2) noexcept { return !(c1 == c2); } + +} // namespace codec +} // namespace klotski diff --git a/src/core/common_code/convert.cc b/src/core/common_code/convert.cc new file mode 100644 index 0000000..1c8d633 --- /dev/null +++ b/src/core/common_code/convert.cc @@ -0,0 +1,89 @@ +#include "common_code.h" + +namespace klotski { +namespace codec { + +//using klotski::RawCode; +//using klotski::ShortCode; +//using klotski::CommonCode; +// +//using klotski::RawCodes; +//using klotski::ShortCode; +//using klotski::CommonCodes; + +/// -------------------------- CommonCode to RawCode -------------------------- + +//RawCode CommonCode::to_raw_code() const noexcept { +// return RawCode(*this); // convert to raw code +//} + +/// ------------------------- CommonCode to ShortCode ------------------------- + +//ShortCode CommonCode::to_short_code() const noexcept { +// return ShortCode(*this); // convert to short code +//} + +/// -------------------------- RawCode to CommonCode -------------------------- + +//CommonCode CommonCode::from_raw_code(uint64_t raw_code) { +// return RawCode(raw_code).to_common_code(); +//} +// +//CommonCode CommonCode::from_raw_code(RawCode &&raw_code) noexcept { +// return raw_code.to_common_code(); +//} +// +//CommonCode CommonCode::from_raw_code(const RawCode &raw_code) noexcept { +// return raw_code.to_common_code(); +//} +// +//CommonCode::CommonCode(RawCode &&raw_code) noexcept { +// code_ = raw_code.to_common_code().code_; // convert from raw code +//} +// +//CommonCode::CommonCode(const RawCode &raw_code) noexcept { +// code_ = raw_code.to_common_code().code_; // convert from raw code +//} + +/// ------------------------- ShortCode to CommonCode ------------------------- + +//CommonCode::CommonCode(ShortCode &&short_code) noexcept { +// code_ = short_code.to_common_code().code_; // convert from short code +//} +// +//CommonCode::CommonCode(const ShortCode &short_code) noexcept { +// code_ = short_code.to_common_code().code_; // convert from short code +//} +// +//CommonCode CommonCode::from_short_code(uint32_t short_code) { +// return ShortCode(short_code).to_common_code(); +//} +// +//CommonCode CommonCode::from_short_code(ShortCode &&short_code) noexcept { +// return short_code.to_common_code(); +//} +// +//CommonCode CommonCode::from_short_code(std::string &&short_code) { +// return ShortCode(std::forward(short_code)).to_common_code(); +//} +// +//CommonCode CommonCode::from_short_code(const ShortCode &short_code) noexcept { +// return short_code.to_common_code(); +//} +// +//CommonCode CommonCode::from_short_code(const std::string &short_code) { +// return ShortCode(short_code).to_common_code(); +//} + +/// ---------------------------- Batch conversions ---------------------------- + +//CommonCodes CommonCode::convert(const RawCodes &raw_codes) noexcept { +// return {raw_codes.begin(), raw_codes.end()}; +//} +// +//CommonCodes CommonCode::convert(const ShortCodes &short_codes) noexcept { +// return {short_codes.begin(), short_codes.end()}; +//} + +} // namespace codec +} // namespace klotski diff --git a/src/core/common_code/serialize.cc b/src/core/common_code/serialize.cc new file mode 100644 index 0000000..cedfc5d --- /dev/null +++ b/src/core/common_code/serialize.cc @@ -0,0 +1,79 @@ +#include +#include "common_code.h" + +namespace klotski { +namespace codec { + +/// Convert a single hexadecimal digit to a character. +inline static char to_hex_char(uint64_t hex_bit) { + if (hex_bit < 0xA) { + return char(hex_bit + '0'); + } + return char(hex_bit + 'A' - 10); +} + +/// Serialize CommonCode into a 9-bit length string. +static std::string normal_encode(uint64_t common_code) { + char code_str[9]; + for (int i = 0; i < 9; ++i) { + code_str[8 - i] = to_hex_char(common_code & 0b1111); + common_code >>= 4; + } + return std::string{code_str, code_str + 9}; +} + +/// Serialize CommonCode into a variable-length string, removing the trailing zero. +static std::string shorten_encode(uint64_t common_code) { + int zero_start; + char code_str[9]; + for (int i = 0; i < 9; ++i) { + auto hex_bit = common_code >> (32 - i * 4) & 0b1111; + code_str[i] = to_hex_char(hex_bit); + if (hex_bit != 0) { + zero_start = 9; + } else if (zero_start == 9) { + zero_start = i; + } + } + return std::string{code_str, code_str + zero_start}; +} + +/// Serialize CommonCode to string by shorten option. +std::string CommonCode::string_encode(uint64_t common_code, bool shorten) noexcept { + if (shorten) { + if (common_code == 0) { + return "0"; // special case + } + return shorten_encode(common_code); + } + return normal_encode(common_code); +} + +/// Deserialize CommonCode from string and return std::nullopt on error. +std::optional CommonCode::string_decode(const std::string &common_code) noexcept { +//uint64_t CommonCode::string_decode(const std::string &common_code) noexcept { + if (common_code.length() > 9 || common_code.empty()) { + return std::nullopt; // invalid string length +// return 0; + } + uint64_t result = 0; + for (auto hex_bit : common_code) { + if (hex_bit >= '0' && hex_bit <= '9') { // 0 ~ 9 + (result <<= 4) |= (hex_bit - '0'); + } else if (hex_bit >= 'A' && hex_bit <= 'F') { // A ~ F + (result <<= 4) |= (hex_bit - 'A' + 10); + } else if (hex_bit >= 'a' && hex_bit <= 'f') { // a ~ f + (result <<= 4) |= (hex_bit - 'a' + 10); + } else { + return std::nullopt; // invalid character + } + } + return result << (36 - common_code.length() * 4); // low-bits fill with zero +} + +std::string CommonCode::to_string(bool shorten) const noexcept { + return string_encode(code_, shorten); +} + +} // namespace codec +} // namespace klotski diff --git a/src/core/main.cc b/src/core/main.cc index 0abd5ed..695662c 100644 --- a/src/core/main.cc +++ b/src/core/main.cc @@ -1,15 +1,55 @@ #include +#include "common_code.h" #include "all_cases/all_cases.h" using klotski::cases::AllCases; using klotski::cases::BasicRanges; +using klotski::codec::CommonCode; + int main() { - BasicRanges::Instance().Build(); +// std::cout << (int)'0' << std::endl; +// std::cout << (int)'A' << std::endl; +// std::cout << CommonCode::string_encode(0x1A9BF0C00, false) << std::endl; +// std::cout << CommonCode::string_encode(0x0'10'00'00'00, false) << std::endl; +// return 0; + +// printf("%09llX\n", CommonCode::string_decode("1A9BF0C").value()); +// return 0; + +// BasicRanges::Instance().Build(); + AllCases::Instance().Build(); + + std::vector common_codes; + common_codes.reserve(klotski::cases::ALL_CASES_NUM_); + + for (uint64_t head = 0; head < 15; ++head) { + for (auto range : AllCases::Instance().Fetch()[head]) { + common_codes.emplace_back(head << 32 | range); + } + } + + std::vector common_codes_str; + common_codes_str.reserve(klotski::cases::ALL_CASES_NUM_); + for (auto x : common_codes) { + common_codes_str.emplace_back(CommonCode::string_encode(x, false)); + } auto start = clock(); +// for (auto common_code : common_codes) { +// printf("%llX\n", common_code); +// CommonCode::string_encode(common_code, true); +// CommonCode::string_encode(common_code, false); +// printf("%s\n", CommonCode::string_encode(common_code, false).c_str()); +// std::cout << CommonCode::string_encode(common_code, false) << std::endl; +// } + + for (auto &common_code_str : common_codes_str) { + CommonCode::string_decode(common_code_str); + } + // BasicRanges::Instance().Build(); // AllCases::Instance().Build(); diff --git a/src/core/utils/utility.h b/src/core/utils/utility.h index e3fa8c6..03c57f5 100644 --- a/src/core/utils/utility.h +++ b/src/core/utils/utility.h @@ -13,6 +13,15 @@ namespace klotski { inline int low_zero_num(uint32_t bin) { return __builtin_ctzl(bin); + // TODO: using (bin ^ (bin - 1)) when non-builtin + + // WARN: be aware of serious performance issues + // return __builtin_popcount(~(bin ^ -bin)) - 1; +} + +inline int low_zero_num(uint64_t bin) { + return __builtin_ctzll(bin); + // WARN: be aware of serious performance issues // return __builtin_popcount(~(bin ^ -bin)) - 1; } diff --git a/src/core_test/codec/common_code.cc b/src/core_test/codec/common_code.cc new file mode 100644 index 0000000..e69de29