docs: add more instructions of codec

2 years ago · 6b2910af81
3 changed files with 162 additions and 123 deletions
--- a/src/core/common_code/common_code.h
+++ b/src/core/common_code/common_code.h
@ -1,15 +1,13 @@
 #pragma once

 /// CommonCode is a generic klotski encoding that records an valid case using
-/// 36-bit lengths, and stored in a `uint64_t`.
-
-/// Since there is only one `2x2` block, it is encoded separately. Its upper
-/// left corner is called `head`, it has 12 possible positions and is encoded
-/// using 4-bit length (0 ~ 15).
+/// 36-bit lengths, and stored in a `uint64_t`. Since there is only one `2x2`
+/// block, it is encoded separately. Its upper-left corner is called `head`,
+/// which has 12 possible positions and is encoded using 4-bit length.
 ///
 ///   00 01 02 03
 ///   04 05 06 07    00 01 02
-///   08 09 10 11    04 05 06  <- head of 2x2 block
+///   08 09 10 11    04 05 06  <- head of 2x2 block (4-bit)
 ///   12 13 14 15    08 09 10     (without 03/07/11/15)
 ///   16 17 18 19    12 13 14

@ -22,36 +20,39 @@
 ///   (        # # )  |  (        # )  |                  |

 /// This sequence can have up to 16 blocks, aka 32-bit in length. Therefore, in
-/// order to be compatible with all cases, the length of this part of the code
-/// is set to 32-bit. In addition, for the convenience of reading, it is
-/// stipulated that the sequence starts from the high bit, and the remaining
+/// order to be compatible with all klotski cases, the length of this part of
+/// the code is set to 32-bit. In addition, for the convenience of reading, it
+/// is stipulated that the sequence starts from the high bit, and the remaining
 /// bits should be filled with `0`.

 /// Putting the content of the `head` in the upper 4-bit, and the lower 32-bit
 /// to store the sequence content, a 36-bit length code can be obtained, which
-/// corresponds to any valid layout one-to-one. When CommonCode is converted
-/// into a string, just directly export the hexadecimal data, and get a 9-bit
-/// string encoding. Characters are not case-sensitive, but it is recommended
-/// to use uppercase letters. In addition, the last `0` of the string is
-/// allowed to be omitted, and it can be completed to 9 digits when decoding,
-/// but note that if the encoding is all `0`, it should be reduced to the
-/// remaining one `0`.
-
-///   Eg1:
-///     % # # %    2x2 -> head = 1
-///     % # # %    2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ...
-///     @ $ $ @     10  10  10  01  10  11  11  11    00    00  11  00  00  00  00  00
-///     @ & * @       1010    1001    1011    1111        0000    1100    0000    0000
-///     *     &          A       9       B       F           0       C       0       0
-///                CommonCode = 0x1A9BF0C00 -> "1A9BF0C"
-
-///   Eg2:
-///     * @ & %    2x2 -> head = 4
-///     # # $ %    1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ...
-///     # # $ ^     11  11  11  10  10  10    00  01    00  11  01  00  00  00  00  00
-///       ~ ~ ^       1111    1110    1010      0001      0011    0100    0000    0000
-///       @ % %          F       E       A         1         3       4       0       0
-///                CommonCode = 0x4FEA13400 -> "4FEA134"
+/// corresponds to any valid case one-to-one. When CommonCode is converted into
+/// a string, just directly export the hexadecimal data, and get a 9-bit string
+/// encoding. Characters are not case-sensitive, but it is recommended to use
+/// uppercase letters. In addition, the last `0` of the string is allowed to be
+/// omitted, and it can be completed to 9 digits when decoding, but note that
+/// if the encoding is all `0`, it should be reduced to the remaining one.
+
+/// ----------------------------------------------------------------------------------- ///
+///   Eg1:                                                                              ///
+///     % # # %    2x2 -> head = 1                                                      ///
+///     % # # %    2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ...  ///
+///     @ $ $ @     10  10  10  01  10  11  11  11    00    00  11  00  00  00  00  00  ///
+///     @ & * @       1010    1001    1011    1111        0000    1100    0000    0000  ///
+///     *     &          A       9       B       F           0       C       0       0  ///
+///   CommonCode = 0x1A9BF0C00 -> "1A9BF0C"                                             ///
+/// ----------------------------------------------------------------------------------- ///
+
+/// ----------------------------------------------------------------------------------- ///
+///   Eg2:                                                                              ///
+///     * @ & %    2x2 -> head = 4                                                      ///
+///     # # $ %    1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ...  ///
+///     # # $ ^     11  11  11  10  10  10    00  01    00  11  01  00  00  00  00  00  ///
+///       ~ ~ ^       1111    1110    1010      0001      0011    0100    0000    0000  ///
+///       @ % %          F       E       A         1         3       4       0       0  ///
+///   CommonCode = 0x4FEA13400 -> "4FEA134"                                             ///
+/// ----------------------------------------------------------------------------------- ///

 #include <string>
 #include <cstdint>
@ -134,10 +135,10 @@ inline CommonCode CommonCode::unsafe_create(uint64_t common_code) noexcept {

 /// CommonCode create with valid check.
 inline std::optional<CommonCode> CommonCode::create(uint64_t common_code) noexcept {
-    if (CommonCode::check(common_code)) {
-        return CommonCode::unsafe_create(common_code);
+    if (!CommonCode::check(common_code)) {
+        return std::nullopt;
    }
-    return std::nullopt;
+    return CommonCode::unsafe_create(common_code);
 }

 /// Output string encoding of CommonCode.
--- a/src/core/raw_code/raw_code.h
+++ b/src/core/raw_code/raw_code.h
@ -1,10 +1,10 @@
 #pragma once

-/// RawCode is an uncompressed coding scheme, which is used for program
-/// calculation. It encodes a `5x4` chessboard as 0 ~ 19, and uses 3-bit to
-/// represent each position, occupying a total of 60-bit, and stored in a
-/// `uint64_t` variable. Among them, the upper 4-bit are reserved and filled
-/// with `0`.
+/// RawCode is an uncompressed klotski coding scheme, which is used for program
+/// calculation. It encodes the `5x4` chessboard as 0 ~ 19, and using 3-bit to
+/// represent each position, occupying a total of 60-bit, and store them in a
+/// `uint64_t` variable. In addition, the upper 4-bit of RawCode are reserved
+/// and must filled with `0`.
 ///
 ///   00 01 02 03
 ///   04 05 06 07    fill   20 slots
@ -12,31 +12,52 @@
 ///   12 13 14 15    (4b) + (3b) * 20 => 64-bit
 ///   16 17 18 19

-///   Eg1:
-///     % # # %    2x1 2x2 ... 2x1    010 100 111 010
-///     % # # %    ... ... ... ...    111 111 111 111
-///     @ $ $ @    2x1 1x2 ... 2x1    010 001 111 010
-///     @ & * @    ... 1x1 1x1 ...    111 011 011 111
-///     *     &    1x1 0x0 0x0 1x1    011 000 000 011
+/// As we all know, 3-bit can represent 8 states. The upper-left corner of the
+/// four blocks corresponds to 4 of them, and 2 more states are needed to mark
+/// spaces and fills. The remaining 2 states are reserved for now.
 ///
-///          |  19  18  17  16 |  15  14  13  12 |  11  10  09  08 |  07  06  05  04 |  03  02  01  00
-///     0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010
-///     0000 |  0110 0000 0011 |  1110 1101 1111 |  0101 1100 1010 |  1111 1111 1111 |  0101 1110 0010
-///        0       6    0    3       E    D    F       5    C    A       F    F    F       5    E    2
-///     => 0x0603'EDF5'CAFF'F5E2
-
-///   Eg2:
-///     * @ & %    1x1 1x1 1x1 2x1    011 011 011 010
-///     # # $ %    2x2 ... 2x1 ...    100 111 010 111
-///     # # $ ^    ... ... ... 2x1    111 111 111 010
-///       ~ ~ ^    0x0 1x2 ... ...    000 001 111 111
-///       @ % %    0x0 1x1 1x2 ...    000 011 001 111
+///   ------------------------------------
+///   | 000 -> space | 100 -> 2x2        |
+///   | 001 -> 1x2   | 101 -> [reserved] |
+///   | 010 -> 2x1   | 110 -> [reserved] |
+///   | 011 -> 1x1   | 111 -> fill       |
+///   ------------------------------------
 ///
-///          |  19  18  17  16 |  15  14  13  12 |  11  10  09  08 |  07  06  05  04 |  03  02  01  00
-///     0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011
-///     0000 |  1110 0101 1000 |  1111 1100 1000 |  0101 1111 1111 |  1110 1011 1100 |  0100 1101 1011
-///        0       E    5    8       F    C    8       5    F    F       E    B    C       4    D    B
-///     => 0x0E58'FC85'FFEB'C4DB
+/// Here, space is defined as `000` and fill is defined as `111`, which will
+/// facilitate the execution of bit operations of the movement algorithm. Other
+/// block definitions will not affect the efficiency of the algorithm.
+
+/// -------------------------------------------------------------------------------------------------- ///
+///   Eg1:                                                                                             ///
+///     % # # %    2x1 2x2 ... 2x1    010 100 111 010                                                  ///
+///     % # # %    ... ... ... ...    111 111 111 111                                                  ///
+///     @ $ $ @    2x1 1x2 ... 2x1    010 001 111 010                                                  ///
+///     @ & * @    ... 1x1 1x1 ...    111 011 011 111                                                  ///
+///     *     &    1x1 0x0 0x0 1x1    011 000 000 011                                                  ///
+///                                                                                                    ///
+///        |  19  18  17  16 |  15  14  13  12 |  11  10  09  08 |  07  06  05  04 |  03  02  01  00   ///
+///   0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010   ///
+///   0000 |  0110 0000 0011 |  1110 1101 1111 |  0101 1100 1010 |  1111 1111 1111 |  0101 1110 0010   ///
+///      0       6    0    3       E    D    F       5    C    A       F    F    F       5    E    2   ///
+///                                                                                                    ///
+///   RawCode => 0x0603'EDF5'CAFF'F5E2                                                                 ///
+/// -------------------------------------------------------------------------------------------------- ///
+
+/// -------------------------------------------------------------------------------------------------- ///
+///   Eg2:                                                                                             ///
+///     * @ & %    1x1 1x1 1x1 2x1    011 011 011 010                                                  ///
+///     # # $ %    2x2 ... 2x1 ...    100 111 010 111                                                  ///
+///     # # $ ^    ... ... ... 2x1    111 111 111 010                                                  ///
+///       ~ ~ ^    0x0 1x2 ... ...    000 001 111 111                                                  ///
+///       @ % %    0x0 1x1 1x2 ...    000 011 001 111                                                  ///
+///                                                                                                    ///
+///        |  19  18  17  16 |  15  14  13  12 |  11  10  09  08 |  07  06  05  04 |  03  02  01  00   ///
+///   0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011   ///
+///   0000 |  1110 0101 1000 |  1111 1100 1000 |  0101 1111 1111 |  1110 1011 1100 |  0100 1101 1011   ///
+///      0       E    5    8       F    C    8       5    F    F       E    B    C       4    D    B   ///
+///                                                                                                    ///
+///   RawCode => 0x0E58'FC85'FFEB'C4DB                                                                 ///
+/// -------------------------------------------------------------------------------------------------- ///

 #include <string>
 #include <ostream>
@ -116,10 +137,10 @@ inline RawCode RawCode::unsafe_create(uint64_t raw_code) noexcept {

 /// RawCode create with valid check.
 inline std::optional<RawCode> RawCode::create(uint64_t raw_code) noexcept {
-    if (RawCode::check(raw_code)) {
-        return RawCode::unsafe_create(raw_code);
+    if (!RawCode::check(raw_code)) {
+        return std::nullopt;
    }
-    return std::nullopt;
+    return RawCode::unsafe_create(raw_code);
 }

 } // namespace codec
--- a/src/core/short_code/short_code.h
+++ b/src/core/short_code/short_code.h
@ -1,72 +1,88 @@
 #pragma once

 /// ShortCode is a high-compression encoding scheme based on CommonCode. Since
-/// there are a total of 29334498 valid klotski layouts, arrange their
-/// CommonCodes from small to large (36-bit positive integers), and use the
-/// index as the ShortCode.
-
-/// Therefore, the valid value of ShortCode is [0, 29334498), stored in
-/// `uint32_t`. The goal of high compression ratio is to facilitate verbal
-/// sharing, so it is necessary to represent it in a suitable string. Similar
-/// to Bitcoin's `base58`, in ShortCode, 4 confusing characters `0` `O` `I` `l`
-/// are removed from 10 numbers and 26 characters, forming a private base32
-/// scheme.
-
-/// Coincidentally, log(32, 29334498) is approximately equal to `4.96`, so
-/// using 5-bit base32 can make good use of space, so any valid klotski layout
-/// can be represented by a 5-bit length code. As in CommonCode, the characters
-/// here are case insensitive, but uppercase is still recommended.
-
-///                ShortCode Convert Table
-///   -------------------------------------------------
-///   |  00 |  01 |  02 |  03 |  04 |  05 |  06 |  07 |
-///   | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` |
-///   |-----------------------------------------------|
-///   |  08 |  09 |  10 |  11 |  12 |  13 |  14 |  15 |
-///   | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` |
-///   |-----------------------------------------------|
-///   |  16 |  17 |  18 |  19 |  20 |  21 |  22 |  23 |
-///   | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` |
-///   |-----------------------------------------------|
-///   |  24 |  25 |  26 |  27 |  28 |  29 |  30 |  31 |
-///   | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` |
-///   -------------------------------------------------
-
-///   Eg1:
-///     0x1A9BF0C00 -> index 4091296
-///     4091296 = 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0)
-///       => (3), (28), (27), (13), (0)
-///       => `4`, `W`, `V`, `E`, `1`
-///       => "4WVE1"
-
-///   Eg2:
-///     0x4FEA13400 -> index 10399732
-///     10399732 = 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0)
-///       => (9), (29), (11), (31), (20)
-///       => `A`, `X`, `C`, `Z`, `N`
-///       => "AXCZN"
-
-/// Compared with CommonCode, although ShortCode saves space, it completely
-/// loses readability. The former can directly get the layout without the help
-/// of a computer, while the latter is almost impossible to complete by the
-/// human brain.
+/// there are a total of 29334498 valid klotski cases, arrange then from small
+/// to large by their CommonCodes (36-bit positive integers), and use the index
+/// as the ShortCode.
+
+/// Therefore, the valid value of ShortCode is [0, 29334498), which stored in a
+/// `uint32_t` variable. The goal of high compression ratio is to facilitate
+/// verbal sharing, so it is necessary to represent it into a suitable string.
+/// Similar to Bitcoin's base58 encoding, in ShortCode, 4 confusing characters
+/// `0` `O` `I` `l` are removed from 10 numbers and 26 characters, forming a
+/// private base32 scheme.
+
+/// Coincidentally, log(32, 29334498) is approximately equal to 4.96, so using
+/// 5-bit base32 can make good use of space, so any valid klotski cases can be
+/// represented by a 5-bit length code. As in CommonCode, the characters here
+/// are case insensitive, but uppercase is still recommended.
+
+/// Compared with the CommonCode, although ShortCode saves space, it completely
+/// loses readability. The former can directly get the case without the help of
+/// a computer, while the latter is almost impossible to complete by the human
+/// brain. But anyway, ShortCode makes it easy to manually record the klotski
+/// cases, either verbally or handwritten.
+
+///                ShortCode Convert Table                ///
+///   -------------------------------------------------   ///
+///   |  00 |  01 |  02 |  03 |  04 |  05 |  06 |  07 |   ///
+///   | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` |   ///
+///   |-----------------------------------------------|   ///
+///   |  08 |  09 |  10 |  11 |  12 |  13 |  14 |  15 |   ///
+///   | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` |   ///
+///   |-----------------------------------------------|   ///
+///   |  16 |  17 |  18 |  19 |  20 |  21 |  22 |  23 |   ///
+///   | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` |   ///
+///   |-----------------------------------------------|   ///
+///   |  24 |  25 |  26 |  27 |  28 |  29 |  30 |  31 |   ///
+///   | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` |   ///
+///   -------------------------------------------------   ///
+
+/// ------------------------------------------------------------------------------------------ ///
+///   Eg1:                                                                                     ///
+///     % # # %                                                                                ///
+///     % # # %                                                                                ///
+///     @ $ $ @    CommonCode = 0x1A9BF0C00 (index 4091296)                                    ///
+///     @ & * @                                                                                ///
+///     *     &                                                                                ///
+///                                                                                            ///
+///   4091296 => 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0)   ///
+///           => (3), (28), (27), (13), (0)                                                    ///
+///           => `4`, `W`, `V`, `E`, `1`                                                       ///
+///           => "4WVE1"                                                                       ///
+/// ------------------------------------------------------------------------------------------ ///
+
+/// -------------------------------------------------------------------------------------------- ///
+///   Eg2:                                                                                       ///
+///     * @ & %                                                                                  ///
+///     # # $ %                                                                                  ///
+///     # # $ ^    CommonCode = 0x4FEA13400 (index 10399732)                                     ///
+///       ~ ~ ^                                                                                  ///
+///       @ % %                                                                                  ///
+///                                                                                              ///
+///   10399732 => 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0)   ///
+///            => (9), (29), (11), (31), (20)                                                    ///
+///            => `A`, `X`, `C`, `Z`, `N`                                                        ///
+///            => "AXCZN"                                                                        ///
+/// -------------------------------------------------------------------------------------------- ///

 #include <string>
 #include <cstdint>
 #include <ostream>
 #include <optional>
+#include "all_cases.h"

 namespace klotski {
 namespace codec {

-const uint32_t SHORT_CODE_LIMIT = 29334498;
+constexpr uint32_t SHORT_CODE_LIMIT = cases::ALL_CASES_NUM_;

 class CommonCode;
 class ShortCode {
 public:
-    static void speed_up(bool fast_mode);
    explicit operator uint32_t() const noexcept;
    static bool check(uint32_t short_code) noexcept;
+    static void speed_up(bool fast_mode = false) noexcept;
    friend std::ostream& operator<<(std::ostream &out, ShortCode self);

    [[nodiscard]] uint32_t unwrap() const noexcept;
@ -90,10 +106,11 @@ public:

 private:
    uint32_t code_;
-    static bool fast_available_;
+    static bool fast_available_; // TODO: try to remove it

    static uint64_t fast_decode(uint32_t short_code) noexcept;
    static uint32_t fast_encode(uint64_t common_code) noexcept;
+
    static uint64_t tiny_decode(uint32_t short_code) noexcept;
    static uint32_t tiny_encode(uint64_t common_code) noexcept;

@ -101,7 +118,7 @@ private:
    static std::optional<uint32_t> string_decode(const std::string &short_code) noexcept;
 };

-/// CommonCode compare implements.
+/// ShortCode compare implements.
 inline bool operator==(uint32_t s1, ShortCode s2) noexcept {
    return s1 == s2.unwrap();
 }
@ -133,12 +150,12 @@ inline ShortCode ShortCode::unsafe_create(uint32_t short_code) noexcept {
    return *reinterpret_cast<ShortCode*>(&short_code); // init directly
 }

-/// CommonCode create with valid check.
+/// ShortCode create with valid check.
 inline std::optional<ShortCode> ShortCode::create(uint32_t short_code) noexcept {
-    if (ShortCode::check(short_code)) {
-        return ShortCode::unsafe_create(short_code);
+    if (!ShortCode::check(short_code)) {
+        return std::nullopt;
    }
-    return std::nullopt;
+    return ShortCode::unsafe_create(short_code);
 }

 /// Output string encoding of ShortCode.