From 6b2910af810f4f329d1c5b32184d8dfbea70666a Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Wed, 4 Oct 2023 01:25:24 +0800 Subject: [PATCH] docs: add more instructions of codec --- src/core/common_code/common_code.h | 71 ++++++++-------- src/core/raw_code/raw_code.h | 83 +++++++++++------- src/core/short_code/short_code.h | 131 ++++++++++++++++------------- 3 files changed, 162 insertions(+), 123 deletions(-) diff --git a/src/core/common_code/common_code.h b/src/core/common_code/common_code.h index 19c4055..6522ac0 100644 --- a/src/core/common_code/common_code.h +++ b/src/core/common_code/common_code.h @@ -1,15 +1,13 @@ #pragma once /// CommonCode is a generic klotski encoding that records an valid case using -/// 36-bit lengths, and stored in a `uint64_t`. - -/// Since there is only one `2x2` block, it is encoded separately. Its upper -/// left corner is called `head`, it has 12 possible positions and is encoded -/// using 4-bit length (0 ~ 15). +/// 36-bit lengths, and stored in a `uint64_t`. Since there is only one `2x2` +/// block, it is encoded separately. Its upper-left corner is called `head`, +/// which has 12 possible positions and is encoded using 4-bit length. /// /// 00 01 02 03 /// 04 05 06 07 00 01 02 -/// 08 09 10 11 04 05 06 <- head of 2x2 block +/// 08 09 10 11 04 05 06 <- head of 2x2 block (4-bit) /// 12 13 14 15 08 09 10 (without 03/07/11/15) /// 16 17 18 19 12 13 14 @@ -22,36 +20,39 @@ /// ( # # ) | ( # ) | | /// This sequence can have up to 16 blocks, aka 32-bit in length. Therefore, in -/// order to be compatible with all cases, the length of this part of the code -/// is set to 32-bit. In addition, for the convenience of reading, it is -/// stipulated that the sequence starts from the high bit, and the remaining +/// order to be compatible with all klotski cases, the length of this part of +/// the code is set to 32-bit. In addition, for the convenience of reading, it +/// is stipulated that the sequence starts from the high bit, and the remaining /// bits should be filled with `0`. /// Putting the content of the `head` in the upper 4-bit, and the lower 32-bit /// to store the sequence content, a 36-bit length code can be obtained, which -/// corresponds to any valid layout one-to-one. When CommonCode is converted -/// into a string, just directly export the hexadecimal data, and get a 9-bit -/// string encoding. Characters are not case-sensitive, but it is recommended -/// to use uppercase letters. In addition, the last `0` of the string is -/// allowed to be omitted, and it can be completed to 9 digits when decoding, -/// but note that if the encoding is all `0`, it should be reduced to the -/// remaining one `0`. - -/// Eg1: -/// % # # % 2x2 -> head = 1 -/// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ... -/// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00 -/// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000 -/// * & A 9 B F 0 C 0 0 -/// CommonCode = 0x1A9BF0C00 -> "1A9BF0C" - -/// Eg2: -/// * @ & % 2x2 -> head = 4 -/// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ... -/// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00 -/// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000 -/// @ % % F E A 1 3 4 0 0 -/// CommonCode = 0x4FEA13400 -> "4FEA134" +/// corresponds to any valid case one-to-one. When CommonCode is converted into +/// a string, just directly export the hexadecimal data, and get a 9-bit string +/// encoding. Characters are not case-sensitive, but it is recommended to use +/// uppercase letters. In addition, the last `0` of the string is allowed to be +/// omitted, and it can be completed to 9 digits when decoding, but note that +/// if the encoding is all `0`, it should be reduced to the remaining one. + +/// ----------------------------------------------------------------------------------- /// +/// Eg1: /// +/// % # # % 2x2 -> head = 1 /// +/// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ... /// +/// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00 /// +/// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000 /// +/// * & A 9 B F 0 C 0 0 /// +/// CommonCode = 0x1A9BF0C00 -> "1A9BF0C" /// +/// ----------------------------------------------------------------------------------- /// + +/// ----------------------------------------------------------------------------------- /// +/// Eg2: /// +/// * @ & % 2x2 -> head = 4 /// +/// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ... /// +/// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00 /// +/// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000 /// +/// @ % % F E A 1 3 4 0 0 /// +/// CommonCode = 0x4FEA13400 -> "4FEA134" /// +/// ----------------------------------------------------------------------------------- /// #include #include @@ -134,10 +135,10 @@ inline CommonCode CommonCode::unsafe_create(uint64_t common_code) noexcept { /// CommonCode create with valid check. inline std::optional CommonCode::create(uint64_t common_code) noexcept { - if (CommonCode::check(common_code)) { - return CommonCode::unsafe_create(common_code); + if (!CommonCode::check(common_code)) { + return std::nullopt; } - return std::nullopt; + return CommonCode::unsafe_create(common_code); } /// Output string encoding of CommonCode. diff --git a/src/core/raw_code/raw_code.h b/src/core/raw_code/raw_code.h index eef5140..bd23e7a 100644 --- a/src/core/raw_code/raw_code.h +++ b/src/core/raw_code/raw_code.h @@ -1,10 +1,10 @@ #pragma once -/// RawCode is an uncompressed coding scheme, which is used for program -/// calculation. It encodes a `5x4` chessboard as 0 ~ 19, and uses 3-bit to -/// represent each position, occupying a total of 60-bit, and stored in a -/// `uint64_t` variable. Among them, the upper 4-bit are reserved and filled -/// with `0`. +/// RawCode is an uncompressed klotski coding scheme, which is used for program +/// calculation. It encodes the `5x4` chessboard as 0 ~ 19, and using 3-bit to +/// represent each position, occupying a total of 60-bit, and store them in a +/// `uint64_t` variable. In addition, the upper 4-bit of RawCode are reserved +/// and must filled with `0`. /// /// 00 01 02 03 /// 04 05 06 07 fill 20 slots @@ -12,31 +12,52 @@ /// 12 13 14 15 (4b) + (3b) * 20 => 64-bit /// 16 17 18 19 -/// Eg1: -/// % # # % 2x1 2x2 ... 2x1 010 100 111 010 -/// % # # % ... ... ... ... 111 111 111 111 -/// @ $ $ @ 2x1 1x2 ... 2x1 010 001 111 010 -/// @ & * @ ... 1x1 1x1 ... 111 011 011 111 -/// * & 1x1 0x0 0x0 1x1 011 000 000 011 +/// As we all know, 3-bit can represent 8 states. The upper-left corner of the +/// four blocks corresponds to 4 of them, and 2 more states are needed to mark +/// spaces and fills. The remaining 2 states are reserved for now. /// -/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 -/// 0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010 -/// 0000 | 0110 0000 0011 | 1110 1101 1111 | 0101 1100 1010 | 1111 1111 1111 | 0101 1110 0010 -/// 0 6 0 3 E D F 5 C A F F F 5 E 2 -/// => 0x0603'EDF5'CAFF'F5E2 - -/// Eg2: -/// * @ & % 1x1 1x1 1x1 2x1 011 011 011 010 -/// # # $ % 2x2 ... 2x1 ... 100 111 010 111 -/// # # $ ^ ... ... ... 2x1 111 111 111 010 -/// ~ ~ ^ 0x0 1x2 ... ... 000 001 111 111 -/// @ % % 0x0 1x1 1x2 ... 000 011 001 111 +/// ------------------------------------ +/// | 000 -> space | 100 -> 2x2 | +/// | 001 -> 1x2 | 101 -> [reserved] | +/// | 010 -> 2x1 | 110 -> [reserved] | +/// | 011 -> 1x1 | 111 -> fill | +/// ------------------------------------ /// -/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 -/// 0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011 -/// 0000 | 1110 0101 1000 | 1111 1100 1000 | 0101 1111 1111 | 1110 1011 1100 | 0100 1101 1011 -/// 0 E 5 8 F C 8 5 F F E B C 4 D B -/// => 0x0E58'FC85'FFEB'C4DB +/// Here, space is defined as `000` and fill is defined as `111`, which will +/// facilitate the execution of bit operations of the movement algorithm. Other +/// block definitions will not affect the efficiency of the algorithm. + +/// -------------------------------------------------------------------------------------------------- /// +/// Eg1: /// +/// % # # % 2x1 2x2 ... 2x1 010 100 111 010 /// +/// % # # % ... ... ... ... 111 111 111 111 /// +/// @ $ $ @ 2x1 1x2 ... 2x1 010 001 111 010 /// +/// @ & * @ ... 1x1 1x1 ... 111 011 011 111 /// +/// * & 1x1 0x0 0x0 1x1 011 000 000 011 /// +/// /// +/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 /// +/// 0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010 /// +/// 0000 | 0110 0000 0011 | 1110 1101 1111 | 0101 1100 1010 | 1111 1111 1111 | 0101 1110 0010 /// +/// 0 6 0 3 E D F 5 C A F F F 5 E 2 /// +/// /// +/// RawCode => 0x0603'EDF5'CAFF'F5E2 /// +/// -------------------------------------------------------------------------------------------------- /// + +/// -------------------------------------------------------------------------------------------------- /// +/// Eg2: /// +/// * @ & % 1x1 1x1 1x1 2x1 011 011 011 010 /// +/// # # $ % 2x2 ... 2x1 ... 100 111 010 111 /// +/// # # $ ^ ... ... ... 2x1 111 111 111 010 /// +/// ~ ~ ^ 0x0 1x2 ... ... 000 001 111 111 /// +/// @ % % 0x0 1x1 1x2 ... 000 011 001 111 /// +/// /// +/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 /// +/// 0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011 /// +/// 0000 | 1110 0101 1000 | 1111 1100 1000 | 0101 1111 1111 | 1110 1011 1100 | 0100 1101 1011 /// +/// 0 E 5 8 F C 8 5 F F E B C 4 D B /// +/// /// +/// RawCode => 0x0E58'FC85'FFEB'C4DB /// +/// -------------------------------------------------------------------------------------------------- /// #include #include @@ -116,10 +137,10 @@ inline RawCode RawCode::unsafe_create(uint64_t raw_code) noexcept { /// RawCode create with valid check. inline std::optional RawCode::create(uint64_t raw_code) noexcept { - if (RawCode::check(raw_code)) { - return RawCode::unsafe_create(raw_code); + if (!RawCode::check(raw_code)) { + return std::nullopt; } - return std::nullopt; + return RawCode::unsafe_create(raw_code); } } // namespace codec diff --git a/src/core/short_code/short_code.h b/src/core/short_code/short_code.h index d53b556..785aa76 100644 --- a/src/core/short_code/short_code.h +++ b/src/core/short_code/short_code.h @@ -1,72 +1,88 @@ #pragma once /// ShortCode is a high-compression encoding scheme based on CommonCode. Since -/// there are a total of 29334498 valid klotski layouts, arrange their -/// CommonCodes from small to large (36-bit positive integers), and use the -/// index as the ShortCode. - -/// Therefore, the valid value of ShortCode is [0, 29334498), stored in -/// `uint32_t`. The goal of high compression ratio is to facilitate verbal -/// sharing, so it is necessary to represent it in a suitable string. Similar -/// to Bitcoin's `base58`, in ShortCode, 4 confusing characters `0` `O` `I` `l` -/// are removed from 10 numbers and 26 characters, forming a private base32 -/// scheme. - -/// Coincidentally, log(32, 29334498) is approximately equal to `4.96`, so -/// using 5-bit base32 can make good use of space, so any valid klotski layout -/// can be represented by a 5-bit length code. As in CommonCode, the characters -/// here are case insensitive, but uppercase is still recommended. - -/// ShortCode Convert Table -/// ------------------------------------------------- -/// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 | -/// | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` | -/// |-----------------------------------------------| -/// | 08 | 09 | 10 | 11 | 12 | 13 | 14 | 15 | -/// | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` | -/// |-----------------------------------------------| -/// | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -/// | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` | -/// |-----------------------------------------------| -/// | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | -/// ------------------------------------------------- - -/// Eg1: -/// 0x1A9BF0C00 -> index 4091296 -/// 4091296 = 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0) -/// => (3), (28), (27), (13), (0) -/// => `4`, `W`, `V`, `E`, `1` -/// => "4WVE1" - -/// Eg2: -/// 0x4FEA13400 -> index 10399732 -/// 10399732 = 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0) -/// => (9), (29), (11), (31), (20) -/// => `A`, `X`, `C`, `Z`, `N` -/// => "AXCZN" - -/// Compared with CommonCode, although ShortCode saves space, it completely -/// loses readability. The former can directly get the layout without the help -/// of a computer, while the latter is almost impossible to complete by the -/// human brain. +/// there are a total of 29334498 valid klotski cases, arrange then from small +/// to large by their CommonCodes (36-bit positive integers), and use the index +/// as the ShortCode. + +/// Therefore, the valid value of ShortCode is [0, 29334498), which stored in a +/// `uint32_t` variable. The goal of high compression ratio is to facilitate +/// verbal sharing, so it is necessary to represent it into a suitable string. +/// Similar to Bitcoin's base58 encoding, in ShortCode, 4 confusing characters +/// `0` `O` `I` `l` are removed from 10 numbers and 26 characters, forming a +/// private base32 scheme. + +/// Coincidentally, log(32, 29334498) is approximately equal to 4.96, so using +/// 5-bit base32 can make good use of space, so any valid klotski cases can be +/// represented by a 5-bit length code. As in CommonCode, the characters here +/// are case insensitive, but uppercase is still recommended. + +/// Compared with the CommonCode, although ShortCode saves space, it completely +/// loses readability. The former can directly get the case without the help of +/// a computer, while the latter is almost impossible to complete by the human +/// brain. But anyway, ShortCode makes it easy to manually record the klotski +/// cases, either verbally or handwritten. + +/// ShortCode Convert Table /// +/// ------------------------------------------------- /// +/// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 | /// +/// | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` | /// +/// |-----------------------------------------------| /// +/// | 08 | 09 | 10 | 11 | 12 | 13 | 14 | 15 | /// +/// | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` | /// +/// |-----------------------------------------------| /// +/// | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | /// +/// | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` | /// +/// |-----------------------------------------------| /// +/// | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | /// +/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | /// +/// ------------------------------------------------- /// + +/// ------------------------------------------------------------------------------------------ /// +/// Eg1: /// +/// % # # % /// +/// % # # % /// +/// @ $ $ @ CommonCode = 0x1A9BF0C00 (index 4091296) /// +/// @ & * @ /// +/// * & /// +/// /// +/// 4091296 => 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0) /// +/// => (3), (28), (27), (13), (0) /// +/// => `4`, `W`, `V`, `E`, `1` /// +/// => "4WVE1" /// +/// ------------------------------------------------------------------------------------------ /// + +/// -------------------------------------------------------------------------------------------- /// +/// Eg2: /// +/// * @ & % /// +/// # # $ % /// +/// # # $ ^ CommonCode = 0x4FEA13400 (index 10399732) /// +/// ~ ~ ^ /// +/// @ % % /// +/// /// +/// 10399732 => 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0) /// +/// => (9), (29), (11), (31), (20) /// +/// => `A`, `X`, `C`, `Z`, `N` /// +/// => "AXCZN" /// +/// -------------------------------------------------------------------------------------------- /// #include #include #include #include +#include "all_cases.h" namespace klotski { namespace codec { -const uint32_t SHORT_CODE_LIMIT = 29334498; +constexpr uint32_t SHORT_CODE_LIMIT = cases::ALL_CASES_NUM_; class CommonCode; class ShortCode { public: - static void speed_up(bool fast_mode); explicit operator uint32_t() const noexcept; static bool check(uint32_t short_code) noexcept; + static void speed_up(bool fast_mode = false) noexcept; friend std::ostream& operator<<(std::ostream &out, ShortCode self); [[nodiscard]] uint32_t unwrap() const noexcept; @@ -90,10 +106,11 @@ public: private: uint32_t code_; - static bool fast_available_; + static bool fast_available_; // TODO: try to remove it static uint64_t fast_decode(uint32_t short_code) noexcept; static uint32_t fast_encode(uint64_t common_code) noexcept; + static uint64_t tiny_decode(uint32_t short_code) noexcept; static uint32_t tiny_encode(uint64_t common_code) noexcept; @@ -101,7 +118,7 @@ private: static std::optional string_decode(const std::string &short_code) noexcept; }; -/// CommonCode compare implements. +/// ShortCode compare implements. inline bool operator==(uint32_t s1, ShortCode s2) noexcept { return s1 == s2.unwrap(); } @@ -133,12 +150,12 @@ inline ShortCode ShortCode::unsafe_create(uint32_t short_code) noexcept { return *reinterpret_cast(&short_code); // init directly } -/// CommonCode create with valid check. +/// ShortCode create with valid check. inline std::optional ShortCode::create(uint32_t short_code) noexcept { - if (ShortCode::check(short_code)) { - return ShortCode::unsafe_create(short_code); + if (!ShortCode::check(short_code)) { + return std::nullopt; } - return std::nullopt; + return ShortCode::unsafe_create(short_code); } /// Output string encoding of ShortCode.