Browse Source

docs: add more instructions of codec

master
Dnomd343 9 months ago
parent
commit
6b2910af81
  1. 71
      src/core/common_code/common_code.h
  2. 83
      src/core/raw_code/raw_code.h
  3. 131
      src/core/short_code/short_code.h

71
src/core/common_code/common_code.h

@ -1,15 +1,13 @@
#pragma once #pragma once
/// CommonCode is a generic klotski encoding that records an valid case using /// CommonCode is a generic klotski encoding that records an valid case using
/// 36-bit lengths, and stored in a `uint64_t`. /// 36-bit lengths, and stored in a `uint64_t`. Since there is only one `2x2`
/// block, it is encoded separately. Its upper-left corner is called `head`,
/// Since there is only one `2x2` block, it is encoded separately. Its upper /// which has 12 possible positions and is encoded using 4-bit length.
/// left corner is called `head`, it has 12 possible positions and is encoded
/// using 4-bit length (0 ~ 15).
/// ///
/// 00 01 02 03 /// 00 01 02 03
/// 04 05 06 07 00 01 02 /// 04 05 06 07 00 01 02
/// 08 09 10 11 04 05 06 <- head of 2x2 block /// 08 09 10 11 04 05 06 <- head of 2x2 block (4-bit)
/// 12 13 14 15 08 09 10 (without 03/07/11/15) /// 12 13 14 15 08 09 10 (without 03/07/11/15)
/// 16 17 18 19 12 13 14 /// 16 17 18 19 12 13 14
@ -22,36 +20,39 @@
/// ( # # ) | ( # ) | | /// ( # # ) | ( # ) | |
/// This sequence can have up to 16 blocks, aka 32-bit in length. Therefore, in /// This sequence can have up to 16 blocks, aka 32-bit in length. Therefore, in
/// order to be compatible with all cases, the length of this part of the code /// order to be compatible with all klotski cases, the length of this part of
/// is set to 32-bit. In addition, for the convenience of reading, it is /// the code is set to 32-bit. In addition, for the convenience of reading, it
/// stipulated that the sequence starts from the high bit, and the remaining /// is stipulated that the sequence starts from the high bit, and the remaining
/// bits should be filled with `0`. /// bits should be filled with `0`.
/// Putting the content of the `head` in the upper 4-bit, and the lower 32-bit /// Putting the content of the `head` in the upper 4-bit, and the lower 32-bit
/// to store the sequence content, a 36-bit length code can be obtained, which /// to store the sequence content, a 36-bit length code can be obtained, which
/// corresponds to any valid layout one-to-one. When CommonCode is converted /// corresponds to any valid case one-to-one. When CommonCode is converted into
/// into a string, just directly export the hexadecimal data, and get a 9-bit /// a string, just directly export the hexadecimal data, and get a 9-bit string
/// string encoding. Characters are not case-sensitive, but it is recommended /// encoding. Characters are not case-sensitive, but it is recommended to use
/// to use uppercase letters. In addition, the last `0` of the string is /// uppercase letters. In addition, the last `0` of the string is allowed to be
/// allowed to be omitted, and it can be completed to 9 digits when decoding, /// omitted, and it can be completed to 9 digits when decoding, but note that
/// but note that if the encoding is all `0`, it should be reduced to the /// if the encoding is all `0`, it should be reduced to the remaining one.
/// remaining one `0`.
/// ----------------------------------------------------------------------------------- ///
/// Eg1: /// Eg1: ///
/// % # # % 2x2 -> head = 1 /// % # # % 2x2 -> head = 1 ///
/// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ... /// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ... ///
/// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00 /// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00 ///
/// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000 /// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000 ///
/// * & A 9 B F 0 C 0 0 /// * & A 9 B F 0 C 0 0 ///
/// CommonCode = 0x1A9BF0C00 -> "1A9BF0C" /// CommonCode = 0x1A9BF0C00 -> "1A9BF0C" ///
/// ----------------------------------------------------------------------------------- ///
/// Eg2:
/// * @ & % 2x2 -> head = 4 /// ----------------------------------------------------------------------------------- ///
/// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ... /// Eg2: ///
/// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00 /// * @ & % 2x2 -> head = 4 ///
/// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000 /// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ... ///
/// @ % % F E A 1 3 4 0 0 /// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00 ///
/// CommonCode = 0x4FEA13400 -> "4FEA134" /// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000 ///
/// @ % % F E A 1 3 4 0 0 ///
/// CommonCode = 0x4FEA13400 -> "4FEA134" ///
/// ----------------------------------------------------------------------------------- ///
#include <string> #include <string>
#include <cstdint> #include <cstdint>
@ -134,10 +135,10 @@ inline CommonCode CommonCode::unsafe_create(uint64_t common_code) noexcept {
/// CommonCode create with valid check. /// CommonCode create with valid check.
inline std::optional<CommonCode> CommonCode::create(uint64_t common_code) noexcept { inline std::optional<CommonCode> CommonCode::create(uint64_t common_code) noexcept {
if (CommonCode::check(common_code)) { if (!CommonCode::check(common_code)) {
return CommonCode::unsafe_create(common_code); return std::nullopt;
} }
return std::nullopt; return CommonCode::unsafe_create(common_code);
} }
/// Output string encoding of CommonCode. /// Output string encoding of CommonCode.

83
src/core/raw_code/raw_code.h

@ -1,10 +1,10 @@
#pragma once #pragma once
/// RawCode is an uncompressed coding scheme, which is used for program /// RawCode is an uncompressed klotski coding scheme, which is used for program
/// calculation. It encodes a `5x4` chessboard as 0 ~ 19, and uses 3-bit to /// calculation. It encodes the `5x4` chessboard as 0 ~ 19, and using 3-bit to
/// represent each position, occupying a total of 60-bit, and stored in a /// represent each position, occupying a total of 60-bit, and store them in a
/// `uint64_t` variable. Among them, the upper 4-bit are reserved and filled /// `uint64_t` variable. In addition, the upper 4-bit of RawCode are reserved
/// with `0`. /// and must filled with `0`.
/// ///
/// 00 01 02 03 /// 00 01 02 03
/// 04 05 06 07 fill 20 slots /// 04 05 06 07 fill 20 slots
@ -12,31 +12,52 @@
/// 12 13 14 15 (4b) + (3b) * 20 => 64-bit /// 12 13 14 15 (4b) + (3b) * 20 => 64-bit
/// 16 17 18 19 /// 16 17 18 19
/// Eg1: /// As we all know, 3-bit can represent 8 states. The upper-left corner of the
/// % # # % 2x1 2x2 ... 2x1 010 100 111 010 /// four blocks corresponds to 4 of them, and 2 more states are needed to mark
/// % # # % ... ... ... ... 111 111 111 111 /// spaces and fills. The remaining 2 states are reserved for now.
/// @ $ $ @ 2x1 1x2 ... 2x1 010 001 111 010
/// @ & * @ ... 1x1 1x1 ... 111 011 011 111
/// * & 1x1 0x0 0x0 1x1 011 000 000 011
/// ///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 /// ------------------------------------
/// 0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010 /// | 000 -> space | 100 -> 2x2 |
/// 0000 | 0110 0000 0011 | 1110 1101 1111 | 0101 1100 1010 | 1111 1111 1111 | 0101 1110 0010 /// | 001 -> 1x2 | 101 -> [reserved] |
/// 0 6 0 3 E D F 5 C A F F F 5 E 2 /// | 010 -> 2x1 | 110 -> [reserved] |
/// => 0x0603'EDF5'CAFF'F5E2 /// | 011 -> 1x1 | 111 -> fill |
/// ------------------------------------
/// Eg2:
/// * @ & % 1x1 1x1 1x1 2x1 011 011 011 010
/// # # $ % 2x2 ... 2x1 ... 100 111 010 111
/// # # $ ^ ... ... ... 2x1 111 111 111 010
/// ~ ~ ^ 0x0 1x2 ... ... 000 001 111 111
/// @ % % 0x0 1x1 1x2 ... 000 011 001 111
/// ///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 /// Here, space is defined as `000` and fill is defined as `111`, which will
/// 0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011 /// facilitate the execution of bit operations of the movement algorithm. Other
/// 0000 | 1110 0101 1000 | 1111 1100 1000 | 0101 1111 1111 | 1110 1011 1100 | 0100 1101 1011 /// block definitions will not affect the efficiency of the algorithm.
/// 0 E 5 8 F C 8 5 F F E B C 4 D B
/// => 0x0E58'FC85'FFEB'C4DB /// -------------------------------------------------------------------------------------------------- ///
/// Eg1: ///
/// % # # % 2x1 2x2 ... 2x1 010 100 111 010 ///
/// % # # % ... ... ... ... 111 111 111 111 ///
/// @ $ $ @ 2x1 1x2 ... 2x1 010 001 111 010 ///
/// @ & * @ ... 1x1 1x1 ... 111 011 011 111 ///
/// * & 1x1 0x0 0x0 1x1 011 000 000 011 ///
/// ///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 ///
/// 0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010 ///
/// 0000 | 0110 0000 0011 | 1110 1101 1111 | 0101 1100 1010 | 1111 1111 1111 | 0101 1110 0010 ///
/// 0 6 0 3 E D F 5 C A F F F 5 E 2 ///
/// ///
/// RawCode => 0x0603'EDF5'CAFF'F5E2 ///
/// -------------------------------------------------------------------------------------------------- ///
/// -------------------------------------------------------------------------------------------------- ///
/// Eg2: ///
/// * @ & % 1x1 1x1 1x1 2x1 011 011 011 010 ///
/// # # $ % 2x2 ... 2x1 ... 100 111 010 111 ///
/// # # $ ^ ... ... ... 2x1 111 111 111 010 ///
/// ~ ~ ^ 0x0 1x2 ... ... 000 001 111 111 ///
/// @ % % 0x0 1x1 1x2 ... 000 011 001 111 ///
/// ///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 ///
/// 0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011 ///
/// 0000 | 1110 0101 1000 | 1111 1100 1000 | 0101 1111 1111 | 1110 1011 1100 | 0100 1101 1011 ///
/// 0 E 5 8 F C 8 5 F F E B C 4 D B ///
/// ///
/// RawCode => 0x0E58'FC85'FFEB'C4DB ///
/// -------------------------------------------------------------------------------------------------- ///
#include <string> #include <string>
#include <ostream> #include <ostream>
@ -116,10 +137,10 @@ inline RawCode RawCode::unsafe_create(uint64_t raw_code) noexcept {
/// RawCode create with valid check. /// RawCode create with valid check.
inline std::optional<RawCode> RawCode::create(uint64_t raw_code) noexcept { inline std::optional<RawCode> RawCode::create(uint64_t raw_code) noexcept {
if (RawCode::check(raw_code)) { if (!RawCode::check(raw_code)) {
return RawCode::unsafe_create(raw_code); return std::nullopt;
} }
return std::nullopt; return RawCode::unsafe_create(raw_code);
} }
} // namespace codec } // namespace codec

131
src/core/short_code/short_code.h

@ -1,72 +1,88 @@
#pragma once #pragma once
/// ShortCode is a high-compression encoding scheme based on CommonCode. Since /// ShortCode is a high-compression encoding scheme based on CommonCode. Since
/// there are a total of 29334498 valid klotski layouts, arrange their /// there are a total of 29334498 valid klotski cases, arrange then from small
/// CommonCodes from small to large (36-bit positive integers), and use the /// to large by their CommonCodes (36-bit positive integers), and use the index
/// index as the ShortCode. /// as the ShortCode.
/// Therefore, the valid value of ShortCode is [0, 29334498), stored in /// Therefore, the valid value of ShortCode is [0, 29334498), which stored in a
/// `uint32_t`. The goal of high compression ratio is to facilitate verbal /// `uint32_t` variable. The goal of high compression ratio is to facilitate
/// sharing, so it is necessary to represent it in a suitable string. Similar /// verbal sharing, so it is necessary to represent it into a suitable string.
/// to Bitcoin's `base58`, in ShortCode, 4 confusing characters `0` `O` `I` `l` /// Similar to Bitcoin's base58 encoding, in ShortCode, 4 confusing characters
/// are removed from 10 numbers and 26 characters, forming a private base32 /// `0` `O` `I` `l` are removed from 10 numbers and 26 characters, forming a
/// scheme. /// private base32 scheme.
/// Coincidentally, log(32, 29334498) is approximately equal to `4.96`, so /// Coincidentally, log(32, 29334498) is approximately equal to 4.96, so using
/// using 5-bit base32 can make good use of space, so any valid klotski layout /// 5-bit base32 can make good use of space, so any valid klotski cases can be
/// can be represented by a 5-bit length code. As in CommonCode, the characters /// represented by a 5-bit length code. As in CommonCode, the characters here
/// here are case insensitive, but uppercase is still recommended. /// are case insensitive, but uppercase is still recommended.
/// ShortCode Convert Table /// Compared with the CommonCode, although ShortCode saves space, it completely
/// ------------------------------------------------- /// loses readability. The former can directly get the case without the help of
/// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 | /// a computer, while the latter is almost impossible to complete by the human
/// | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` | /// brain. But anyway, ShortCode makes it easy to manually record the klotski
/// |-----------------------------------------------| /// cases, either verbally or handwritten.
/// | 08 | 09 | 10 | 11 | 12 | 13 | 14 | 15 |
/// | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` | /// ShortCode Convert Table ///
/// |-----------------------------------------------| /// ------------------------------------------------- ///
/// | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | /// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 | ///
/// | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` | /// | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` | ///
/// |-----------------------------------------------| /// |-----------------------------------------------| ///
/// | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | /// | 08 | 09 | 10 | 11 | 12 | 13 | 14 | 15 | ///
/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | /// | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` | ///
/// ------------------------------------------------- /// |-----------------------------------------------| ///
/// | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | ///
/// Eg1: /// | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` | ///
/// 0x1A9BF0C00 -> index 4091296 /// |-----------------------------------------------| ///
/// 4091296 = 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0) /// | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | ///
/// => (3), (28), (27), (13), (0) /// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | ///
/// => `4`, `W`, `V`, `E`, `1` /// ------------------------------------------------- ///
/// => "4WVE1"
/// ------------------------------------------------------------------------------------------ ///
/// Eg2: /// Eg1: ///
/// 0x4FEA13400 -> index 10399732 /// % # # % ///
/// 10399732 = 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0) /// % # # % ///
/// => (9), (29), (11), (31), (20) /// @ $ $ @ CommonCode = 0x1A9BF0C00 (index 4091296) ///
/// => `A`, `X`, `C`, `Z`, `N` /// @ & * @ ///
/// => "AXCZN" /// * & ///
/// ///
/// Compared with CommonCode, although ShortCode saves space, it completely /// 4091296 => 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0) ///
/// loses readability. The former can directly get the layout without the help /// => (3), (28), (27), (13), (0) ///
/// of a computer, while the latter is almost impossible to complete by the /// => `4`, `W`, `V`, `E`, `1` ///
/// human brain. /// => "4WVE1" ///
/// ------------------------------------------------------------------------------------------ ///
/// -------------------------------------------------------------------------------------------- ///
/// Eg2: ///
/// * @ & % ///
/// # # $ % ///
/// # # $ ^ CommonCode = 0x4FEA13400 (index 10399732) ///
/// ~ ~ ^ ///
/// @ % % ///
/// ///
/// 10399732 => 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0) ///
/// => (9), (29), (11), (31), (20) ///
/// => `A`, `X`, `C`, `Z`, `N` ///
/// => "AXCZN" ///
/// -------------------------------------------------------------------------------------------- ///
#include <string> #include <string>
#include <cstdint> #include <cstdint>
#include <ostream> #include <ostream>
#include <optional> #include <optional>
#include "all_cases.h"
namespace klotski { namespace klotski {
namespace codec { namespace codec {
const uint32_t SHORT_CODE_LIMIT = 29334498; constexpr uint32_t SHORT_CODE_LIMIT = cases::ALL_CASES_NUM_;
class CommonCode; class CommonCode;
class ShortCode { class ShortCode {
public: public:
static void speed_up(bool fast_mode);
explicit operator uint32_t() const noexcept; explicit operator uint32_t() const noexcept;
static bool check(uint32_t short_code) noexcept; static bool check(uint32_t short_code) noexcept;
static void speed_up(bool fast_mode = false) noexcept;
friend std::ostream& operator<<(std::ostream &out, ShortCode self); friend std::ostream& operator<<(std::ostream &out, ShortCode self);
[[nodiscard]] uint32_t unwrap() const noexcept; [[nodiscard]] uint32_t unwrap() const noexcept;
@ -90,10 +106,11 @@ public:
private: private:
uint32_t code_; uint32_t code_;
static bool fast_available_; static bool fast_available_; // TODO: try to remove it
static uint64_t fast_decode(uint32_t short_code) noexcept; static uint64_t fast_decode(uint32_t short_code) noexcept;
static uint32_t fast_encode(uint64_t common_code) noexcept; static uint32_t fast_encode(uint64_t common_code) noexcept;
static uint64_t tiny_decode(uint32_t short_code) noexcept; static uint64_t tiny_decode(uint32_t short_code) noexcept;
static uint32_t tiny_encode(uint64_t common_code) noexcept; static uint32_t tiny_encode(uint64_t common_code) noexcept;
@ -101,7 +118,7 @@ private:
static std::optional<uint32_t> string_decode(const std::string &short_code) noexcept; static std::optional<uint32_t> string_decode(const std::string &short_code) noexcept;
}; };
/// CommonCode compare implements. /// ShortCode compare implements.
inline bool operator==(uint32_t s1, ShortCode s2) noexcept { inline bool operator==(uint32_t s1, ShortCode s2) noexcept {
return s1 == s2.unwrap(); return s1 == s2.unwrap();
} }
@ -133,12 +150,12 @@ inline ShortCode ShortCode::unsafe_create(uint32_t short_code) noexcept {
return *reinterpret_cast<ShortCode*>(&short_code); // init directly return *reinterpret_cast<ShortCode*>(&short_code); // init directly
} }
/// CommonCode create with valid check. /// ShortCode create with valid check.
inline std::optional<ShortCode> ShortCode::create(uint32_t short_code) noexcept { inline std::optional<ShortCode> ShortCode::create(uint32_t short_code) noexcept {
if (ShortCode::check(short_code)) { if (!ShortCode::check(short_code)) {
return ShortCode::unsafe_create(short_code); return std::nullopt;
} }
return std::nullopt; return ShortCode::unsafe_create(short_code);
} }
/// Output string encoding of ShortCode. /// Output string encoding of ShortCode.

Loading…
Cancel
Save