Browse Source

feat: string serialize and deserialize of CommonCode

legacy
Dnomd343 1 year ago
parent
commit
d2ed1ff5bd
  1. 19
      src/core/CMakeLists.txt
  2. 1
      src/core/common_code/common_code.cc
  3. 141
      src/core/common_code/common_code.h
  4. 89
      src/core/common_code/convert.cc
  5. 79
      src/core/common_code/serialize.cc
  6. 42
      src/core/main.cc
  7. 9
      src/core/utils/utility.h
  8. 0
      src/core_test/codec/common_code.cc

19
src/core/CMakeLists.txt

@ -10,7 +10,22 @@ add_compile_options(-fno-exceptions)
include_directories(utils) include_directories(utils)
include_directories(all_cases) include_directories(all_cases)
include_directories(common_code)
add_library(${PROJECT_NAME} all_cases/basic_ranges.cc all_cases/all_cases.cc ffi/all_cases.cc) add_library(${PROJECT_NAME}
all_cases/basic_ranges.cc
all_cases/all_cases.cc
ffi/all_cases.cc
common_code/common_code.cc
common_code/serialize.cc
common_code/convert.cc
)
add_executable(${PROJECT_NAME}_cli main.cc all_cases/basic_ranges.cc all_cases/all_cases.cc) add_executable(${PROJECT_NAME}_cli main.cc
all_cases/basic_ranges.cc
all_cases/all_cases.cc
ffi/all_cases.cc
common_code/common_code.cc
common_code/serialize.cc
common_code/convert.cc
)

1
src/core/common_code/common_code.cc

@ -0,0 +1 @@
#include "common_code.h"

141
src/core/common_code/common_code.h

@ -0,0 +1,141 @@
#pragma once
/// CommonCode is a generic klotski encoding that records an valid case using
/// 36-bit lengths, and stored in a `uint64_t`.
/// Since there is only one `2x2` block, it is encoded separately. Its upper
/// left corner is called `head`, it has 12 possible positions and is encoded
/// using 4-bit length (0 ~ 15).
///
/// 00 01 02 03
/// 04 05 06 07 00 01 02
/// 08 09 10 11 04 05 06 <- head of 2x2 block
/// 12 13 14 15 08 09 10 (without 03/07/11/15)
/// 16 17 18 19 12 13 14
/// Treat spaces as special blocks, there can be four kinds of blocks in total,
/// namely `space`, `1x2`, `2x1`, `1x1`. Each of them is represented by 2-bit,
/// which are `00` `01` `10` `11`. Arrange them according to their position and
/// size, and we can get a binary sequence.
///
/// ( 2x2 -> # # ) | ( 2x1 -> # ) | ( 1x2 -> # # ) | ( 1x1 -> # )
/// ( # # ) | ( # ) | |
/// This sequence can have up to 16 blocks, aka 32-bit in length. Therefore, in
/// order to be compatible with all cases, the length of this part of the code
/// is set to 32-bit. In addition, for the convenience of reading, it is
/// stipulated that the sequence starts from the high bit, and the remaining
/// bits should be filled with `0`.
/// Putting the content of the `head` in the upper 4-bit, and the lower 32-bit
/// to store the sequence content, a 36-bit length code can be obtained, which
/// corresponds to any valid layout one-to-one. When CommonCode is converted
/// into a string, just directly export the hexadecimal data, and get a 9-bit
/// string encoding. Characters are not case-sensitive, but it is recommended
/// to use uppercase letters. In addition, the last `0` of the string is
/// allowed to be omitted, and it can be completed to 9 digits when decoding,
/// but note that if the encoding is all `0`, it should be reduced to the
/// remaining one `0`.
/// Eg1:
/// % # # % 2x2 -> head = 1
/// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ...
/// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00
/// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000
/// * & A 9 B F 0 C 0 0
/// CommonCode = 0x1A9BF0C00 -> "1A9BF0C"
/// Eg2:
/// * @ & % 2x2 -> head = 4
/// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ...
/// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00
/// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000
/// @ % % F E A 1 3 4 0 0
/// CommonCode = 0x4FEA13400 -> "4FEA134"
#include <vector>
#include <cstdint>
#include <optional>
namespace klotski {
namespace codec {
class RawCode;
class ShortCode;
class CommonCode;
//typedef std::vector<RawCode> RawCodes;
//typedef std::vector<ShortCode> ShortCodes;
//typedef std::vector<CommonCode> CommonCodes;
class CommonCode {
public:
/// Validity check
static bool check(uint64_t common_code) noexcept;
/// Operators of CommonCode
explicit operator uint64_t() const noexcept { return code_; }
friend std::ostream& operator<<(std::ostream &out, const CommonCode &self);
/// Export functions
RawCode to_raw_code() const noexcept;
ShortCode to_short_code() const noexcept;
std::string to_string(bool shorten = false) const noexcept;
constexpr uint64_t unwrap() const noexcept { return code_; }
/// CommonCode constructors
CommonCode() = delete;
explicit CommonCode(RawCode raw_code) noexcept;
explicit CommonCode(ShortCode short_code) noexcept;
/// CommonCode initializations
static CommonCode unsafe_create(uint64_t common_code) noexcept;
static std::optional<CommonCode> create(uint64_t common_code) noexcept;
static std::optional<CommonCode> from_string(std::string &&common_code) noexcept;
static std::optional<CommonCode> from_string(const std::string &common_code) noexcept;
static CommonCode from_raw_code(RawCode raw_code) noexcept;
static std::optional<CommonCode> from_raw_code(uint64_t raw_code) noexcept;
static CommonCode from_short_code(ShortCode short_code) noexcept;
static std::optional<CommonCode> from_short_code(uint32_t short_code) noexcept;
static std::optional<CommonCode> from_short_code(std::string &&short_code) noexcept;
static std::optional<CommonCode> from_short_code(const std::string &short_code) noexcept;
/// Batch conversions
// static CommonCodes convert(const RawCodes &raw_codes) noexcept;
// static CommonCodes convert(const ShortCodes &short_codes) noexcept;
static std::string string_encode(uint64_t common_code, bool shorten) noexcept;
static std::optional<uint64_t> string_decode(const std::string &common_code) noexcept;
private:
uint64_t code_;
// static std::string string_encode(uint64_t common_code, bool shorten) noexcept;
// static std::optional<uint64_t> string_decode(const std::string &common_code) noexcept;
};
/// CommonCode create
//inline CommonCode CommonCode::create(uint64_t common_code) {
// return CommonCode(common_code); // with check
//}
/// CommonCode create without check
inline CommonCode CommonCode::unsafe_create(uint64_t common_code) noexcept {
return *(CommonCode*)&common_code; // init directly
}
/// Compare implements
//inline bool operator==(uint64_t c1, const CommonCode &c2) noexcept { return c1 == c2.unwrap(); }
//inline bool operator==(const CommonCode &c1, uint64_t c2) noexcept { return c1.unwrap() == c2; }
//inline bool operator!=(uint64_t c1, const CommonCode &c2) noexcept { return !(c1 == c2); }
//inline bool operator!=(const CommonCode &c1, uint64_t c2) noexcept { return !(c1 == c2); }
//inline bool operator<(const CommonCode &c1, const CommonCode &c2) noexcept { return c1.unwrap() < c2.unwrap(); }
//inline bool operator>(const CommonCode &c1, const CommonCode &c2) noexcept { return c1.unwrap() > c2.unwrap(); }
//inline bool operator==(const CommonCode &c1, const CommonCode &c2) noexcept { return c1.unwrap() == c2.unwrap(); }
//inline bool operator!=(const CommonCode &c1, const CommonCode &c2) noexcept { return !(c1 == c2); }
} // namespace codec
} // namespace klotski

89
src/core/common_code/convert.cc

@ -0,0 +1,89 @@
#include "common_code.h"
namespace klotski {
namespace codec {
//using klotski::RawCode;
//using klotski::ShortCode;
//using klotski::CommonCode;
//
//using klotski::RawCodes;
//using klotski::ShortCode;
//using klotski::CommonCodes;
/// -------------------------- CommonCode to RawCode --------------------------
//RawCode CommonCode::to_raw_code() const noexcept {
// return RawCode(*this); // convert to raw code
//}
/// ------------------------- CommonCode to ShortCode -------------------------
//ShortCode CommonCode::to_short_code() const noexcept {
// return ShortCode(*this); // convert to short code
//}
/// -------------------------- RawCode to CommonCode --------------------------
//CommonCode CommonCode::from_raw_code(uint64_t raw_code) {
// return RawCode(raw_code).to_common_code();
//}
//
//CommonCode CommonCode::from_raw_code(RawCode &&raw_code) noexcept {
// return raw_code.to_common_code();
//}
//
//CommonCode CommonCode::from_raw_code(const RawCode &raw_code) noexcept {
// return raw_code.to_common_code();
//}
//
//CommonCode::CommonCode(RawCode &&raw_code) noexcept {
// code_ = raw_code.to_common_code().code_; // convert from raw code
//}
//
//CommonCode::CommonCode(const RawCode &raw_code) noexcept {
// code_ = raw_code.to_common_code().code_; // convert from raw code
//}
/// ------------------------- ShortCode to CommonCode -------------------------
//CommonCode::CommonCode(ShortCode &&short_code) noexcept {
// code_ = short_code.to_common_code().code_; // convert from short code
//}
//
//CommonCode::CommonCode(const ShortCode &short_code) noexcept {
// code_ = short_code.to_common_code().code_; // convert from short code
//}
//
//CommonCode CommonCode::from_short_code(uint32_t short_code) {
// return ShortCode(short_code).to_common_code();
//}
//
//CommonCode CommonCode::from_short_code(ShortCode &&short_code) noexcept {
// return short_code.to_common_code();
//}
//
//CommonCode CommonCode::from_short_code(std::string &&short_code) {
// return ShortCode(std::forward<std::string>(short_code)).to_common_code();
//}
//
//CommonCode CommonCode::from_short_code(const ShortCode &short_code) noexcept {
// return short_code.to_common_code();
//}
//
//CommonCode CommonCode::from_short_code(const std::string &short_code) {
// return ShortCode(short_code).to_common_code();
//}
/// ---------------------------- Batch conversions ----------------------------
//CommonCodes CommonCode::convert(const RawCodes &raw_codes) noexcept {
// return {raw_codes.begin(), raw_codes.end()};
//}
//
//CommonCodes CommonCode::convert(const ShortCodes &short_codes) noexcept {
// return {short_codes.begin(), short_codes.end()};
//}
} // namespace codec
} // namespace klotski

79
src/core/common_code/serialize.cc

@ -0,0 +1,79 @@
#include <string>
#include "common_code.h"
namespace klotski {
namespace codec {
/// Convert a single hexadecimal digit to a character.
inline static char to_hex_char(uint64_t hex_bit) {
if (hex_bit < 0xA) {
return char(hex_bit + '0');
}
return char(hex_bit + 'A' - 10);
}
/// Serialize CommonCode into a 9-bit length string.
static std::string normal_encode(uint64_t common_code) {
char code_str[9];
for (int i = 0; i < 9; ++i) {
code_str[8 - i] = to_hex_char(common_code & 0b1111);
common_code >>= 4;
}
return std::string{code_str, code_str + 9};
}
/// Serialize CommonCode into a variable-length string, removing the trailing zero.
static std::string shorten_encode(uint64_t common_code) {
int zero_start;
char code_str[9];
for (int i = 0; i < 9; ++i) {
auto hex_bit = common_code >> (32 - i * 4) & 0b1111;
code_str[i] = to_hex_char(hex_bit);
if (hex_bit != 0) {
zero_start = 9;
} else if (zero_start == 9) {
zero_start = i;
}
}
return std::string{code_str, code_str + zero_start};
}
/// Serialize CommonCode to string by shorten option.
std::string CommonCode::string_encode(uint64_t common_code, bool shorten) noexcept {
if (shorten) {
if (common_code == 0) {
return "0"; // special case
}
return shorten_encode(common_code);
}
return normal_encode(common_code);
}
/// Deserialize CommonCode from string and return std::nullopt on error.
std::optional<uint64_t> CommonCode::string_decode(const std::string &common_code) noexcept {
//uint64_t CommonCode::string_decode(const std::string &common_code) noexcept {
if (common_code.length() > 9 || common_code.empty()) {
return std::nullopt; // invalid string length
// return 0;
}
uint64_t result = 0;
for (auto hex_bit : common_code) {
if (hex_bit >= '0' && hex_bit <= '9') { // 0 ~ 9
(result <<= 4) |= (hex_bit - '0');
} else if (hex_bit >= 'A' && hex_bit <= 'F') { // A ~ F
(result <<= 4) |= (hex_bit - 'A' + 10);
} else if (hex_bit >= 'a' && hex_bit <= 'f') { // a ~ f
(result <<= 4) |= (hex_bit - 'a' + 10);
} else {
return std::nullopt; // invalid character
}
}
return result << (36 - common_code.length() * 4); // low-bits fill with zero
}
std::string CommonCode::to_string(bool shorten) const noexcept {
return string_encode(code_, shorten);
}
} // namespace codec
} // namespace klotski

42
src/core/main.cc

@ -1,15 +1,55 @@
#include <iostream> #include <iostream>
#include "common_code.h"
#include "all_cases/all_cases.h" #include "all_cases/all_cases.h"
using klotski::cases::AllCases; using klotski::cases::AllCases;
using klotski::cases::BasicRanges; using klotski::cases::BasicRanges;
using klotski::codec::CommonCode;
int main() { int main() {
BasicRanges::Instance().Build(); // std::cout << (int)'0' << std::endl;
// std::cout << (int)'A' << std::endl;
// std::cout << CommonCode::string_encode(0x1A9BF0C00, false) << std::endl;
// std::cout << CommonCode::string_encode(0x0'10'00'00'00, false) << std::endl;
// return 0;
// printf("%09llX\n", CommonCode::string_decode("1A9BF0C").value());
// return 0;
// BasicRanges::Instance().Build();
AllCases::Instance().Build();
std::vector<uint64_t> common_codes;
common_codes.reserve(klotski::cases::ALL_CASES_NUM_);
for (uint64_t head = 0; head < 15; ++head) {
for (auto range : AllCases::Instance().Fetch()[head]) {
common_codes.emplace_back(head << 32 | range);
}
}
std::vector<std::string> common_codes_str;
common_codes_str.reserve(klotski::cases::ALL_CASES_NUM_);
for (auto x : common_codes) {
common_codes_str.emplace_back(CommonCode::string_encode(x, false));
}
auto start = clock(); auto start = clock();
// for (auto common_code : common_codes) {
// printf("%llX\n", common_code);
// CommonCode::string_encode(common_code, true);
// CommonCode::string_encode(common_code, false);
// printf("%s\n", CommonCode::string_encode(common_code, false).c_str());
// std::cout << CommonCode::string_encode(common_code, false) << std::endl;
// }
for (auto &common_code_str : common_codes_str) {
CommonCode::string_decode(common_code_str);
}
// BasicRanges::Instance().Build(); // BasicRanges::Instance().Build();
// AllCases::Instance().Build(); // AllCases::Instance().Build();

9
src/core/utils/utility.h

@ -13,6 +13,15 @@ namespace klotski {
inline int low_zero_num(uint32_t bin) { inline int low_zero_num(uint32_t bin) {
return __builtin_ctzl(bin); return __builtin_ctzl(bin);
// TODO: using (bin ^ (bin - 1)) when non-builtin
// WARN: be aware of serious performance issues
// return __builtin_popcount(~(bin ^ -bin)) - 1;
}
inline int low_zero_num(uint64_t bin) {
return __builtin_ctzll(bin);
// WARN: be aware of serious performance issues // WARN: be aware of serious performance issues
// return __builtin_popcount(~(bin ^ -bin)) - 1; // return __builtin_popcount(~(bin ^ -bin)) - 1;
} }

0
src/core_test/codec/common_code.cc

Loading…
Cancel
Save