From 7cbfd9badd684211d5ad4a9475ee6dcec9e22915 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Mon, 16 Jan 2023 20:00:01 +0800 Subject: [PATCH] update: interface of ShortCode --- src/main.cc | 25 ++++--- src/short_code/CMakeLists.txt | 2 +- src/short_code/convert.cc | 16 +++-- src/short_code/data_loader.cc | 70 ------------------- src/short_code/serialize.cc | 44 ++++++++++++ .../{short_code_chars.h => serialize_chars.h} | 6 +- src/short_code/short_code.cc | 61 ++++++++-------- src/short_code/short_code.h | 54 ++++---------- 8 files changed, 117 insertions(+), 161 deletions(-) delete mode 100644 src/short_code/data_loader.cc create mode 100644 src/short_code/serialize.cc rename src/short_code/{short_code_chars.h => serialize_chars.h} (84%) diff --git a/src/main.cc b/src/main.cc index 58b8418..8d4b820 100644 --- a/src/main.cc +++ b/src/main.cc @@ -178,13 +178,13 @@ int main() { // AllCases::build(); BasicRanges::build(); - std::vector all_cases; - for (uint64_t head = 0; head < 16; ++head) { - for (const auto &range : AllCases::fetch()[head]) { - all_cases.emplace_back(head << 32 | range); - } - } - std::cout << "test data size: " << all_cases.size() << std::endl; +// std::vector all_cases; +// for (uint64_t head = 0; head < 16; ++head) { +// for (const auto &range : AllCases::fetch()[head]) { +// all_cases.emplace_back(head << 32 | range); +// } +// } +// std::cout << "test data size: " << all_cases.size() << std::endl; // std::cout << "start benchmark" << std::endl; auto start_time = clock(); @@ -271,14 +271,17 @@ int main() { // } // } - for (uint32_t i = 1000000; i < 1010000; ++i) { - ShortCode::tiny_decode(i); - ShortCode::tiny_encode(all_cases[i]); - } +// for (uint32_t i = 1000000; i < 1010000; ++i) { +// ShortCode::tiny_decode(i); +// ShortCode::tiny_encode(all_cases[i]); +// } // printf("%09lX\n", ShortCode::tiny_decode(14323231)); // std::cout << ShortCode::tiny_encode(0x6EC0F8800) << std::endl; + std::cout << ShortCode(14323231).to_string() << std::endl; + std::cout << ShortCode::from_string("EP4HZ").unwrap() << std::endl; + // printf("%09lX\n", ShortCode::fast_decode(14323231)); // std::cout << ShortCode::fast_encode(0x6EC0F8800) << std::endl; diff --git a/src/short_code/CMakeLists.txt b/src/short_code/CMakeLists.txt index a6fc511..eb192d7 100644 --- a/src/short_code/CMakeLists.txt +++ b/src/short_code/CMakeLists.txt @@ -2,5 +2,5 @@ cmake_minimum_required(VERSION 3.0) include_directories(offset) -add_library(short_code convert.cc short_code.cc data_loader.cc) +add_library(short_code convert.cc serialize.cc short_code.cc) target_link_libraries(short_code utils all_cases) diff --git a/src/short_code/convert.cc b/src/short_code/convert.cc index ef44802..6e6a004 100644 --- a/src/short_code/convert.cc +++ b/src/short_code/convert.cc @@ -6,21 +6,25 @@ #include "basic_ranges_offset.h" #include "range_prefix_offset.h" -CommonCode ShortCode::to_common_code() const { // convert to common code - if (ShortCode::check_mode() == ShortCode::NORMAL) { - return CommonCode::unsafe_create(tiny_decode(code)); // normal mode - } - return CommonCode::unsafe_create(fast_decode(code)); // fast mode +ShortCode ShortCode::from_common_code(const CommonCode &common_code) { + return ShortCode(common_code); // convert from common code } ShortCode::ShortCode(const CommonCode &common_code) { // convert from common code - if (ShortCode::check_mode() == ShortCode::NORMAL) { + if (ShortCode::mode() == ShortCode::NORMAL) { code = tiny_encode(common_code.unwrap()); // normal mode } else { code = fast_encode(common_code.unwrap()); // fast mode } } +CommonCode ShortCode::to_common_code() const { // convert to common code + if (ShortCode::mode() == ShortCode::NORMAL) { + return CommonCode::unsafe_create(tiny_decode(code)); // normal mode + } + return CommonCode::unsafe_create(fast_decode(code)); // fast mode +} + /// NOTE: ensure that input common code is valid !!! uint32_t ShortCode::fast_encode(uint64_t common_code) { // common code --> short code auto head = common_code >> 32; // head index diff --git a/src/short_code/data_loader.cc b/src/short_code/data_loader.cc deleted file mode 100644 index 382f54e..0000000 --- a/src/short_code/data_loader.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include "all_cases.h" -#include "short_code.h" -#include "basic_ranges.h" -//#include "short_code_mark.h" - -//std::mutex ShortCode::map_building; -bool ShortCode::fast_mode_available = false; -bool ShortCode::normal_mode_available = false; - -//std::vector ShortCode::all_cases_list; -//std::unordered_map ShortCode::all_cases_dict; - -void ShortCode::speed_up(ShortCode::Mode mode) { - if (fast_mode_available) { - return; // fast mode already available - } - if (mode == ShortCode::FAST) { // build fast mode data -// build_mappings(); - - // TODO: confirm AllCases data available - AllCases::build(); - - } else if (mode == ShortCode::NORMAL && !normal_mode_available) { // build normal mode data - BasicRanges::build(); // blocking function - normal_mode_available = true; - } -} - -ShortCode::Mode ShortCode::check_mode() { // ensure speed up enabled and return current mode - if (fast_mode_available) { - return ShortCode::FAST; // fast mode already enabled - } - if (normal_mode_available) { - return ShortCode::NORMAL; // normal mode already enabled - } - speed_up(ShortCode::Mode::NORMAL); // without initialized -> enter normal mode - return ShortCode::Mode::NORMAL; // use normal mode -} - -/// ensure that fast_mode_available == false -//void ShortCode::build_mappings() { // build fast search mappings -// if (map_building.try_lock()) { // lock success -> start building -// for (int head = 0; head < 16; ++head) { -// uint64_t prefix = (uint64_t)head << 32; -// for (const auto &range : AllCases::fetch()[head]) { // blocking function -// all_cases_list.emplace_back(prefix | range); // short_code -> common_code -// } -// } -// for (int index = 0; index < all_cases_list.size(); ++index) { -// all_cases_dict[all_cases_list[index]] = index; // common_code -> short_code -// } -// fast_mode_available = true; // set available flag -// } else { // another thread building -// map_building.lock(); // blocking waiting -// } -// map_building.unlock(); -//} - -//uint32_t ShortCode::fast_encode_legacy(uint64_t common_code) { -// return all_cases_dict[common_code]; -//} - -#include - -#include "basic_ranges_offset.h" -#include "range_prefix_offset.h" - -#include -#include "common.h" - diff --git a/src/short_code/serialize.cc b/src/short_code/serialize.cc new file mode 100644 index 0000000..07b2bde --- /dev/null +++ b/src/short_code/serialize.cc @@ -0,0 +1,44 @@ +#include +#include "short_code.h" +#include "serialize_chars.h" + +ShortCode ShortCode::from_string(const std::string &short_code) { + return ShortCode(short_code); // convert from string +} + +std::string ShortCode::to_string() const { // encode as 5-bits string + uint32_t short_code = code; + std::string result(5, '\0'); // short code length 5 + for (int n = 0; n < 5; ++n) { + uint8_t bit = short_code % 32; + short_code = (short_code - bit) / 32; + result[4 - n] = SHORT_CODE_TABLE[bit]; + } + return result; +} + +ShortCode::ShortCode(const std::string &short_code) { // 5-bits string decode + if (short_code.length() != 5) { // check string length + throw std::invalid_argument("short code format error"); + } + + uint64_t result = 0; + for (auto bit : short_code) { + result *= 32; + if (bit >= 'a' && bit <= 'z') { + bit -= 32; // convert to uppercase + } + if (bit >= '1' && bit <= 'Z') { + result += (bit = SHORT_CODE_TABLE_REV[bit - 49]); // table convert + if (bit != -1) { + continue; // pass check + } + } + throw std::invalid_argument("short code format error"); // unknown characters + } + + if (!ShortCode::check(result)) { // check converted short code + throw std::invalid_argument("invalid short code"); + } + code = result; +} diff --git a/src/short_code/short_code_chars.h b/src/short_code/serialize_chars.h similarity index 84% rename from src/short_code/short_code_chars.h rename to src/short_code/serialize_chars.h index 6d1def7..9411dac 100644 --- a/src/short_code/short_code_chars.h +++ b/src/short_code/serialize_chars.h @@ -1,6 +1,8 @@ #pragma once -const char SHORT_CODE_TABLE[32] = { +#include + +const int8_t SHORT_CODE_TABLE[32] = { '1', '2', '3', '4', '5', '6', '7', '8', '9', // skip `0` 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // skip `I` 'J', 'K', // skip `L` @@ -9,7 +11,7 @@ const char SHORT_CODE_TABLE[32] = { }; /// `1`(49) ~ `Z`(90) -const char SHORT_CODE_TABLE_REV[42] = { +const int8_t SHORT_CODE_TABLE_REV[42] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, // `1`(49) ~ `9`(57) -1, -1, -1, -1, -1, -1, -1, // `:`(58) ~ `@`(64) 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, // `A`(65) ~ `J`(74) diff --git a/src/short_code/short_code.cc b/src/short_code/short_code.cc index 43aad3e..9b5946c 100644 --- a/src/short_code/short_code.cc +++ b/src/short_code/short_code.cc @@ -1,11 +1,17 @@ -#include +#include "all_cases.h" #include "short_code.h" -#include "short_code_chars.h" + +bool ShortCode::fast_mode_available = false; +bool ShortCode::normal_mode_available = false; uint32_t ShortCode::unwrap() const { // get raw uint32_t code return code; } +ShortCode ShortCode::create(uint32_t short_code) { + return ShortCode(short_code); +} + bool ShortCode::check(uint32_t short_code) { return short_code < ShortCode::SHORT_CODE_LIMIT; // 0 ~ (SHORT_CODE_LIMIT - 1) } @@ -17,39 +23,34 @@ ShortCode::ShortCode(uint32_t short_code) { code = short_code; } -ShortCode::ShortCode(const std::string &short_code_str) { // 5-bits string decode - if (short_code_str.length() != 5) { // check string length - throw std::invalid_argument("short code format error"); - } - uint64_t short_code = 0; - for (auto bit : short_code_str) { - short_code *= 32; - if (bit >= 'a' && bit <= 'z') { - bit -= 32; // convert to uppercase - } - if (bit >= '1' && bit <= 'Z') { - short_code += (bit = SHORT_CODE_TABLE_REV[bit - 49]); // table convert - if (bit != -1) { - continue; // pass check - } - } - throw std::invalid_argument("short code format error"); // unknown characters + +void ShortCode::speed_up(ShortCode::Mode mode) { + if (fast_mode_available) { + return; // fast mode already available } + if (mode == ShortCode::FAST) { // build fast mode data +// build_mappings(); - if (!ShortCode::check(short_code)) { // check converted short code - throw std::invalid_argument("invalid short code"); + // TODO: confirm AllCases data available + AllCases::build(); + + } else if (mode == ShortCode::NORMAL && !normal_mode_available) { // build normal mode data + BasicRanges::build(); // blocking function + normal_mode_available = true; } - code = short_code; } -std::string ShortCode::to_string() const { // encode as 5-bits string - uint32_t short_code = code; - std::string result(5, '\0'); // short code length 5 - for (int n = 0; n < 5; ++n) { - uint8_t bit = short_code % 32; - short_code = (short_code - bit) / 32; - result[4 - n] = SHORT_CODE_TABLE[bit]; +ShortCode::Mode ShortCode::mode() { // ensure speed up enabled and return current mode + if (fast_mode_available) { + return ShortCode::FAST; // fast mode already enabled + } + if (normal_mode_available) { + return ShortCode::NORMAL; // normal mode already enabled } - return result; + speed_up(ShortCode::Mode::NORMAL); // without initialized -> enter normal mode + return ShortCode::Mode::NORMAL; // use normal mode } + + + diff --git a/src/short_code/short_code.h b/src/short_code/short_code.h index 60683fb..453176d 100644 --- a/src/short_code/short_code.h +++ b/src/short_code/short_code.h @@ -1,19 +1,13 @@ #pragma once -#include -#include #include -#include #include "common_code.h" class CommonCode; class ShortCode { public: - enum Mode { - NORMAL, FAST - }; - static enum Mode check_mode(); + enum Mode {NORMAL, FAST}; static void speed_up(enum Mode mode); uint32_t unwrap() const; @@ -24,49 +18,27 @@ public: // TODO: std::cout << ShortCode(...) << std::endl; explicit ShortCode(uint32_t short_code); + explicit ShortCode(const std::string &short_code); explicit ShortCode(const CommonCode &common_code); - explicit ShortCode(const std::string &short_code_str); - ShortCode(uint32_t short_code, enum Mode mode) : ShortCode(short_code) { - speed_up(mode); - } - ShortCode(const CommonCode &common_code, enum Mode mode) : ShortCode(common_code) { - speed_up(mode); - } - ShortCode(const std::string &short_code_str, enum Mode mode) : ShortCode(short_code_str) { - speed_up(mode); - } - // TODO: mark as private after inner test - static uint64_t fast_decode(uint32_t short_code); - static uint32_t fast_encode(uint64_t common_code); - - static uint64_t tiny_decode(uint32_t short_code); - static uint32_t tiny_encode(uint64_t common_code); - -// static uint64_t tiny_decode_10b(uint32_t short_code); -// static uint32_t tiny_encode_10b(uint64_t common_code); - - // TODO: ShortCode::create() / ShortCode::from_str(...) / ShortCode::from_common_code(...) + static ShortCode create(uint32_t short_code); + static ShortCode from_string(const std::string &short_code); + static ShortCode from_common_code(const CommonCode &common_code); -// static uint32_t fast_encode_legacy(uint64_t common_code); + ShortCode(uint32_t short_code, enum Mode mode) : ShortCode(short_code) { speed_up(mode); } + ShortCode(const std::string &short_code, enum Mode mode) : ShortCode(short_code) { speed_up(mode); } + ShortCode(const CommonCode &common_code, enum Mode mode) : ShortCode(common_code) { speed_up(mode); } private: uint32_t code; - -// static std::mutex map_building; static bool fast_mode_available; static bool normal_mode_available; - static const uint32_t SHORT_CODE_LIMIT = 29334498; - /// NOTE: using binary search instead of global vector and unordered_map - /// for some test, the new function only using < 170MB memory, while the legacy using > 1.5GB - /// but the legacy one is more easy and a little fast. - /// BTW, the new one init less than 1.2s, legacy one need about 15s -// static std::vector all_cases_list; // short_code -> common_code -// static std::unordered_map all_cases_dict; // common_code -> short_code + static enum Mode mode(); -// static void build_mappings(); -// static uint64_t tiny_decode(uint32_t short_code); -// static uint32_t tiny_encode(uint64_t common_code); + static uint64_t fast_decode(uint32_t short_code); + static uint32_t fast_encode(uint64_t common_code); + static uint64_t tiny_decode(uint32_t short_code); + static uint32_t tiny_encode(uint64_t common_code); };