Browse Source

docs: add more instructions of codec

legacy
Dnomd343 1 year ago
parent
commit
6b2910af81
  1. 71
      src/core/common_code/common_code.h
  2. 83
      src/core/raw_code/raw_code.h
  3. 131
      src/core/short_code/short_code.h

71
src/core/common_code/common_code.h

@ -1,15 +1,13 @@
#pragma once
/// CommonCode is a generic klotski encoding that records an valid case using
/// 36-bit lengths, and stored in a `uint64_t`.
/// Since there is only one `2x2` block, it is encoded separately. Its upper
/// left corner is called `head`, it has 12 possible positions and is encoded
/// using 4-bit length (0 ~ 15).
/// 36-bit lengths, and stored in a `uint64_t`. Since there is only one `2x2`
/// block, it is encoded separately. Its upper-left corner is called `head`,
/// which has 12 possible positions and is encoded using 4-bit length.
///
/// 00 01 02 03
/// 04 05 06 07 00 01 02
/// 08 09 10 11 04 05 06 <- head of 2x2 block
/// 08 09 10 11 04 05 06 <- head of 2x2 block (4-bit)
/// 12 13 14 15 08 09 10 (without 03/07/11/15)
/// 16 17 18 19 12 13 14
@ -22,36 +20,39 @@
/// ( # # ) | ( # ) | |
/// This sequence can have up to 16 blocks, aka 32-bit in length. Therefore, in
/// order to be compatible with all cases, the length of this part of the code
/// is set to 32-bit. In addition, for the convenience of reading, it is
/// stipulated that the sequence starts from the high bit, and the remaining
/// order to be compatible with all klotski cases, the length of this part of
/// the code is set to 32-bit. In addition, for the convenience of reading, it
/// is stipulated that the sequence starts from the high bit, and the remaining
/// bits should be filled with `0`.
/// Putting the content of the `head` in the upper 4-bit, and the lower 32-bit
/// to store the sequence content, a 36-bit length code can be obtained, which
/// corresponds to any valid layout one-to-one. When CommonCode is converted
/// into a string, just directly export the hexadecimal data, and get a 9-bit
/// string encoding. Characters are not case-sensitive, but it is recommended
/// to use uppercase letters. In addition, the last `0` of the string is
/// allowed to be omitted, and it can be completed to 9 digits when decoding,
/// but note that if the encoding is all `0`, it should be reduced to the
/// remaining one `0`.
/// Eg1:
/// % # # % 2x2 -> head = 1
/// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ...
/// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00
/// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000
/// * & A 9 B F 0 C 0 0
/// CommonCode = 0x1A9BF0C00 -> "1A9BF0C"
/// Eg2:
/// * @ & % 2x2 -> head = 4
/// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ...
/// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00
/// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000
/// @ % % F E A 1 3 4 0 0
/// CommonCode = 0x4FEA13400 -> "4FEA134"
/// corresponds to any valid case one-to-one. When CommonCode is converted into
/// a string, just directly export the hexadecimal data, and get a 9-bit string
/// encoding. Characters are not case-sensitive, but it is recommended to use
/// uppercase letters. In addition, the last `0` of the string is allowed to be
/// omitted, and it can be completed to 9 digits when decoding, but note that
/// if the encoding is all `0`, it should be reduced to the remaining one.
/// ----------------------------------------------------------------------------------- ///
/// Eg1: ///
/// % # # % 2x2 -> head = 1 ///
/// % # # % 2x1 2x1 2x1 1x2 2x1 1x1 1x1 1x1 space space 1x1 ... ... ... ... ... ///
/// @ $ $ @ 10 10 10 01 10 11 11 11 00 00 11 00 00 00 00 00 ///
/// @ & * @ 1010 1001 1011 1111 0000 1100 0000 0000 ///
/// * & A 9 B F 0 C 0 0 ///
/// CommonCode = 0x1A9BF0C00 -> "1A9BF0C" ///
/// ----------------------------------------------------------------------------------- ///
/// ----------------------------------------------------------------------------------- ///
/// Eg2: ///
/// * @ & % 2x2 -> head = 4 ///
/// # # $ % 1x1 1x1 1x1 2x1 2x1 2x1 space 1x2 space 1x1 1x2 ... ... ... ... ... ///
/// # # $ ^ 11 11 11 10 10 10 00 01 00 11 01 00 00 00 00 00 ///
/// ~ ~ ^ 1111 1110 1010 0001 0011 0100 0000 0000 ///
/// @ % % F E A 1 3 4 0 0 ///
/// CommonCode = 0x4FEA13400 -> "4FEA134" ///
/// ----------------------------------------------------------------------------------- ///
#include <string>
#include <cstdint>
@ -134,11 +135,11 @@ inline CommonCode CommonCode::unsafe_create(uint64_t common_code) noexcept {
/// CommonCode create with valid check.
inline std::optional<CommonCode> CommonCode::create(uint64_t common_code) noexcept {
if (CommonCode::check(common_code)) {
return CommonCode::unsafe_create(common_code);
}
if (!CommonCode::check(common_code)) {
return std::nullopt;
}
return CommonCode::unsafe_create(common_code);
}
/// Output string encoding of CommonCode.
inline std::ostream& operator<<(std::ostream &out, CommonCode self) {

83
src/core/raw_code/raw_code.h

@ -1,10 +1,10 @@
#pragma once
/// RawCode is an uncompressed coding scheme, which is used for program
/// calculation. It encodes a `5x4` chessboard as 0 ~ 19, and uses 3-bit to
/// represent each position, occupying a total of 60-bit, and stored in a
/// `uint64_t` variable. Among them, the upper 4-bit are reserved and filled
/// with `0`.
/// RawCode is an uncompressed klotski coding scheme, which is used for program
/// calculation. It encodes the `5x4` chessboard as 0 ~ 19, and using 3-bit to
/// represent each position, occupying a total of 60-bit, and store them in a
/// `uint64_t` variable. In addition, the upper 4-bit of RawCode are reserved
/// and must filled with `0`.
///
/// 00 01 02 03
/// 04 05 06 07 fill 20 slots
@ -12,31 +12,52 @@
/// 12 13 14 15 (4b) + (3b) * 20 => 64-bit
/// 16 17 18 19
/// Eg1:
/// % # # % 2x1 2x2 ... 2x1 010 100 111 010
/// % # # % ... ... ... ... 111 111 111 111
/// @ $ $ @ 2x1 1x2 ... 2x1 010 001 111 010
/// @ & * @ ... 1x1 1x1 ... 111 011 011 111
/// * & 1x1 0x0 0x0 1x1 011 000 000 011
/// As we all know, 3-bit can represent 8 states. The upper-left corner of the
/// four blocks corresponds to 4 of them, and 2 more states are needed to mark
/// spaces and fills. The remaining 2 states are reserved for now.
///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00
/// 0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010
/// 0000 | 0110 0000 0011 | 1110 1101 1111 | 0101 1100 1010 | 1111 1111 1111 | 0101 1110 0010
/// 0 6 0 3 E D F 5 C A F F F 5 E 2
/// => 0x0603'EDF5'CAFF'F5E2
/// Eg2:
/// * @ & % 1x1 1x1 1x1 2x1 011 011 011 010
/// # # $ % 2x2 ... 2x1 ... 100 111 010 111
/// # # $ ^ ... ... ... 2x1 111 111 111 010
/// ~ ~ ^ 0x0 1x2 ... ... 000 001 111 111
/// @ % % 0x0 1x1 1x2 ... 000 011 001 111
/// ------------------------------------
/// | 000 -> space | 100 -> 2x2 |
/// | 001 -> 1x2 | 101 -> [reserved] |
/// | 010 -> 2x1 | 110 -> [reserved] |
/// | 011 -> 1x1 | 111 -> fill |
/// ------------------------------------
///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00
/// 0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011
/// 0000 | 1110 0101 1000 | 1111 1100 1000 | 0101 1111 1111 | 1110 1011 1100 | 0100 1101 1011
/// 0 E 5 8 F C 8 5 F F E B C 4 D B
/// => 0x0E58'FC85'FFEB'C4DB
/// Here, space is defined as `000` and fill is defined as `111`, which will
/// facilitate the execution of bit operations of the movement algorithm. Other
/// block definitions will not affect the efficiency of the algorithm.
/// -------------------------------------------------------------------------------------------------- ///
/// Eg1: ///
/// % # # % 2x1 2x2 ... 2x1 010 100 111 010 ///
/// % # # % ... ... ... ... 111 111 111 111 ///
/// @ $ $ @ 2x1 1x2 ... 2x1 010 001 111 010 ///
/// @ & * @ ... 1x1 1x1 ... 111 011 011 111 ///
/// * & 1x1 0x0 0x0 1x1 011 000 000 011 ///
/// ///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 ///
/// 0000 | 011 000 000 011 | 111 011 011 111 | 010 111 001 010 | 111 111 111 111 | 010 111 100 010 ///
/// 0000 | 0110 0000 0011 | 1110 1101 1111 | 0101 1100 1010 | 1111 1111 1111 | 0101 1110 0010 ///
/// 0 6 0 3 E D F 5 C A F F F 5 E 2 ///
/// ///
/// RawCode => 0x0603'EDF5'CAFF'F5E2 ///
/// -------------------------------------------------------------------------------------------------- ///
/// -------------------------------------------------------------------------------------------------- ///
/// Eg2: ///
/// * @ & % 1x1 1x1 1x1 2x1 011 011 011 010 ///
/// # # $ % 2x2 ... 2x1 ... 100 111 010 111 ///
/// # # $ ^ ... ... ... 2x1 111 111 111 010 ///
/// ~ ~ ^ 0x0 1x2 ... ... 000 001 111 111 ///
/// @ % % 0x0 1x1 1x2 ... 000 011 001 111 ///
/// ///
/// | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 ///
/// 0000 | 111 001 011 000 | 111 111 001 000 | 010 111 111 111 | 111 010 111 100 | 010 011 011 011 ///
/// 0000 | 1110 0101 1000 | 1111 1100 1000 | 0101 1111 1111 | 1110 1011 1100 | 0100 1101 1011 ///
/// 0 E 5 8 F C 8 5 F F E B C 4 D B ///
/// ///
/// RawCode => 0x0E58'FC85'FFEB'C4DB ///
/// -------------------------------------------------------------------------------------------------- ///
#include <string>
#include <ostream>
@ -116,11 +137,11 @@ inline RawCode RawCode::unsafe_create(uint64_t raw_code) noexcept {
/// RawCode create with valid check.
inline std::optional<RawCode> RawCode::create(uint64_t raw_code) noexcept {
if (RawCode::check(raw_code)) {
return RawCode::unsafe_create(raw_code);
}
if (!RawCode::check(raw_code)) {
return std::nullopt;
}
return RawCode::unsafe_create(raw_code);
}
} // namespace codec
} // namespace klotski

131
src/core/short_code/short_code.h

@ -1,72 +1,88 @@
#pragma once
/// ShortCode is a high-compression encoding scheme based on CommonCode. Since
/// there are a total of 29334498 valid klotski layouts, arrange their
/// CommonCodes from small to large (36-bit positive integers), and use the
/// index as the ShortCode.
/// Therefore, the valid value of ShortCode is [0, 29334498), stored in
/// `uint32_t`. The goal of high compression ratio is to facilitate verbal
/// sharing, so it is necessary to represent it in a suitable string. Similar
/// to Bitcoin's `base58`, in ShortCode, 4 confusing characters `0` `O` `I` `l`
/// are removed from 10 numbers and 26 characters, forming a private base32
/// scheme.
/// Coincidentally, log(32, 29334498) is approximately equal to `4.96`, so
/// using 5-bit base32 can make good use of space, so any valid klotski layout
/// can be represented by a 5-bit length code. As in CommonCode, the characters
/// here are case insensitive, but uppercase is still recommended.
/// ShortCode Convert Table
/// -------------------------------------------------
/// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 |
/// | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` |
/// |-----------------------------------------------|
/// | 08 | 09 | 10 | 11 | 12 | 13 | 14 | 15 |
/// | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` |
/// |-----------------------------------------------|
/// | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
/// | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` |
/// |-----------------------------------------------|
/// | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` |
/// -------------------------------------------------
/// Eg1:
/// 0x1A9BF0C00 -> index 4091296
/// 4091296 = 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0)
/// => (3), (28), (27), (13), (0)
/// => `4`, `W`, `V`, `E`, `1`
/// => "4WVE1"
/// Eg2:
/// 0x4FEA13400 -> index 10399732
/// 10399732 = 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0)
/// => (9), (29), (11), (31), (20)
/// => `A`, `X`, `C`, `Z`, `N`
/// => "AXCZN"
/// Compared with CommonCode, although ShortCode saves space, it completely
/// loses readability. The former can directly get the layout without the help
/// of a computer, while the latter is almost impossible to complete by the
/// human brain.
/// there are a total of 29334498 valid klotski cases, arrange then from small
/// to large by their CommonCodes (36-bit positive integers), and use the index
/// as the ShortCode.
/// Therefore, the valid value of ShortCode is [0, 29334498), which stored in a
/// `uint32_t` variable. The goal of high compression ratio is to facilitate
/// verbal sharing, so it is necessary to represent it into a suitable string.
/// Similar to Bitcoin's base58 encoding, in ShortCode, 4 confusing characters
/// `0` `O` `I` `l` are removed from 10 numbers and 26 characters, forming a
/// private base32 scheme.
/// Coincidentally, log(32, 29334498) is approximately equal to 4.96, so using
/// 5-bit base32 can make good use of space, so any valid klotski cases can be
/// represented by a 5-bit length code. As in CommonCode, the characters here
/// are case insensitive, but uppercase is still recommended.
/// Compared with the CommonCode, although ShortCode saves space, it completely
/// loses readability. The former can directly get the case without the help of
/// a computer, while the latter is almost impossible to complete by the human
/// brain. But anyway, ShortCode makes it easy to manually record the klotski
/// cases, either verbally or handwritten.
/// ShortCode Convert Table ///
/// ------------------------------------------------- ///
/// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 | ///
/// | `1` | `2` | `3` | `4` | `5` | `6` | `7` | `8` | ///
/// |-----------------------------------------------| ///
/// | 08 | 09 | 10 | 11 | 12 | 13 | 14 | 15 | ///
/// | `9` | `A` | `B` | `C` | `D` | `E` | `F` | `G` | ///
/// |-----------------------------------------------| ///
/// | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | ///
/// | `H` | `J` | `K` | `M` | `N` | `P` | `Q` | `R` | ///
/// |-----------------------------------------------| ///
/// | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | ///
/// | `S` | `T` | `U` | `V` | `W` | `X` | `Y` | `Z` | ///
/// ------------------------------------------------- ///
/// ------------------------------------------------------------------------------------------ ///
/// Eg1: ///
/// % # # % ///
/// % # # % ///
/// @ $ $ @ CommonCode = 0x1A9BF0C00 (index 4091296) ///
/// @ & * @ ///
/// * & ///
/// ///
/// 4091296 => 3 * (32 ^ 4) + 28 * (32 ^ 3) + 27 * (32 ^ 2) + 13 * (32 ^ 1) + 0 * (32 ^ 0) ///
/// => (3), (28), (27), (13), (0) ///
/// => `4`, `W`, `V`, `E`, `1` ///
/// => "4WVE1" ///
/// ------------------------------------------------------------------------------------------ ///
/// -------------------------------------------------------------------------------------------- ///
/// Eg2: ///
/// * @ & % ///
/// # # $ % ///
/// # # $ ^ CommonCode = 0x4FEA13400 (index 10399732) ///
/// ~ ~ ^ ///
/// @ % % ///
/// ///
/// 10399732 => 9 * (32 ^ 4) + 29 * (32 ^ 3) + 11 * (32 ^ 2) + 31 * (32 ^ 1) + 20 * (32 ^ 0) ///
/// => (9), (29), (11), (31), (20) ///
/// => `A`, `X`, `C`, `Z`, `N` ///
/// => "AXCZN" ///
/// -------------------------------------------------------------------------------------------- ///
#include <string>
#include <cstdint>
#include <ostream>
#include <optional>
#include "all_cases.h"
namespace klotski {
namespace codec {
const uint32_t SHORT_CODE_LIMIT = 29334498;
constexpr uint32_t SHORT_CODE_LIMIT = cases::ALL_CASES_NUM_;
class CommonCode;
class ShortCode {
public:
static void speed_up(bool fast_mode);
explicit operator uint32_t() const noexcept;
static bool check(uint32_t short_code) noexcept;
static void speed_up(bool fast_mode = false) noexcept;
friend std::ostream& operator<<(std::ostream &out, ShortCode self);
[[nodiscard]] uint32_t unwrap() const noexcept;
@ -90,10 +106,11 @@ public:
private:
uint32_t code_;
static bool fast_available_;
static bool fast_available_; // TODO: try to remove it
static uint64_t fast_decode(uint32_t short_code) noexcept;
static uint32_t fast_encode(uint64_t common_code) noexcept;
static uint64_t tiny_decode(uint32_t short_code) noexcept;
static uint32_t tiny_encode(uint64_t common_code) noexcept;
@ -101,7 +118,7 @@ private:
static std::optional<uint32_t> string_decode(const std::string &short_code) noexcept;
};
/// CommonCode compare implements.
/// ShortCode compare implements.
inline bool operator==(uint32_t s1, ShortCode s2) noexcept {
return s1 == s2.unwrap();
}
@ -133,13 +150,13 @@ inline ShortCode ShortCode::unsafe_create(uint32_t short_code) noexcept {
return *reinterpret_cast<ShortCode*>(&short_code); // init directly
}
/// CommonCode create with valid check.
/// ShortCode create with valid check.
inline std::optional<ShortCode> ShortCode::create(uint32_t short_code) noexcept {
if (ShortCode::check(short_code)) {
return ShortCode::unsafe_create(short_code);
}
if (!ShortCode::check(short_code)) {
return std::nullopt;
}
return ShortCode::unsafe_create(short_code);
}
/// Output string encoding of ShortCode.
inline std::ostream& operator<<(std::ostream &out, ShortCode self) {

Loading…
Cancel
Save