From 6f98f15932996702970c48b6f81dd5d70de7a16c Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 23 Mar 2024 00:05:43 +0800 Subject: [PATCH] feat: md5 wrapper implement --- CMakeLists.txt | 9 ++-- benchmark.cc | 10 +---- src/{md5_core.cc => core.cc} | 5 +-- src/md5.h | 16 ++++--- src/md5_math.h | 2 +- src/md5_wrapper.cc | 64 --------------------------- src/wrapper.cc | 85 ++++++++++++++++++++++++++++++++++++ test/md5_update.cc | 60 +++++++++---------------- 8 files changed, 124 insertions(+), 127 deletions(-) rename src/{md5_core.cc => core.cc} (95%) delete mode 100644 src/md5_wrapper.cc create mode 100644 src/wrapper.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 898b9ea..b500302 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,14 +1,14 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.12) project(md5sum LANGUAGES CXX) option(MD5_ENABLE_TESTING "Enable testing of the md5sum library." ON) option(MD5_ENABLE_BENCHMARK "Enable benchmark of the md5sum library." ON) set(CMAKE_CXX_STANDARD 20) +add_compile_options(-Wall -Wextra) -add_compile_options(-fno-rtti -fno-exceptions -Wall -Wextra) - -add_library(md5sum STATIC src/md5_core.cc src/md5_wrapper.cc) +add_library(md5sum STATIC src/core.cc src/wrapper.cc) +target_compile_options(md5sum PRIVATE -fno-rtti -fno-exceptions) target_include_directories(md5sum INTERFACE src/) include(third_party/ThirdParty.cmake) @@ -23,4 +23,5 @@ endif() if (MD5_ENABLE_BENCHMARK) add_executable(md5_benchmark benchmark.cc) target_link_libraries(md5_benchmark PRIVATE md5sum::md5sum benchmark::benchmark_main) + target_compile_options(md5_benchmark PRIVATE -fno-rtti -fno-exceptions) endif() diff --git a/benchmark.cc b/benchmark.cc index 463a121..790e362 100644 --- a/benchmark.cc +++ b/benchmark.cc @@ -10,22 +10,16 @@ std::string test_data() { return {data, data + 64}; } -static void BM_MD5_NEXT(benchmark::State &state) { +static void MD5_Update(benchmark::State &state) { const auto data = test_data(); -// md5::MD5::md5_ctx c; -// md5::MD5::md5_reset(&c); - md5::MD5 kk; for (auto _ : state) { - kk.Update(data.c_str(), 64); - -// md5::MD5::md5_update(&c, data.c_str(), 64); } } -BENCHMARK(BM_MD5_NEXT); +BENCHMARK(MD5_Update); BENCHMARK_MAIN(); diff --git a/src/md5_core.cc b/src/core.cc similarity index 95% rename from src/md5_core.cc rename to src/core.cc index 9671eb9..fccbf09 100644 --- a/src/md5_core.cc +++ b/src/core.cc @@ -35,13 +35,12 @@ namespace md5 { -static const unsigned char Padding[64] {0x80}; +static const unsigned char Padding[64] {0x80, /* 0x00, ... */}; consteval int K(int index) { // index -> [0, 64) - auto i = index >> 4; const int step[] = {1, 5, 3, 7}; const int begin[] = {0, 1, 5, 0}; - return (begin[i] + step[i] * index) & 0b1111; + return (begin[index >> 4] + step[index >> 4] * index) & 0b1111; } consteval int S(int index) { // index -> [0, 64) diff --git a/src/md5.h b/src/md5.h index 3c1de77..6c1ae2e 100644 --- a/src/md5.h +++ b/src/md5.h @@ -3,27 +3,29 @@ #include #include +static_assert(sizeof(uintptr_t) == 8, + "Project only works on 64-bits architecture."); + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Project only works on little-endian architecture."); -// TODO: static_assert for 64-bit arch - namespace md5 { class MD5 { public: MD5() = default; + MD5& Final(); MD5& Reset(); - // TODO: using std::string_view + + MD5& Update(const std::string_view &data); MD5& Update(const void *buffer, uint64_t len); - MD5& Final(); - [[nodiscard]] std::string Digest(); - [[nodiscard]] std::string String(); + [[nodiscard]] std::string Digest() const; public: static std::string Hash(const std::string_view &data); + static std::string Hash(const void *data, uint64_t len); private: static constexpr uint32_t MD5_A = 0x67452301; @@ -42,7 +44,7 @@ private: private: md5_ctx ctx_; char buffer_[64] {}; - char buffer_size_ = 0; + uint64_t buffer_size_ = 0; private: /// Update md5 ctx with specified data, note that `len` is a multiple of 64. diff --git a/src/md5_math.h b/src/md5_math.h index dcc26d9..7506ad8 100644 --- a/src/md5_math.h +++ b/src/md5_math.h @@ -44,7 +44,7 @@ consteval double sin(double x) { if (std::abs(x) > PI / 2) { x = ((x > 0) ? 1 : -1) * PI - x; // -PI / 2 < x < PI / 2 } - return sin_core(x); + return sin_core(x); // closer to 0 for better accuracy } } // namespace md5::math diff --git a/src/md5_wrapper.cc b/src/md5_wrapper.cc deleted file mode 100644 index 41e664a..0000000 --- a/src/md5_wrapper.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include "md5.h" - -namespace md5 { - -MD5& MD5::Reset() { - ctx_.A = MD5_A; - ctx_.B = MD5_B; - ctx_.C = MD5_C; - ctx_.D = MD5_D; - - ctx_.size = 0; - buffer_size_ = 0; - return *this; -} - -MD5& MD5::Update(const void *buffer, uint64_t len) { - // TODO: handle buffer_ - - if (buffer_size_ == 0) { - // TODO: len % 64 == 0 - md5_update(&ctx_, buffer, len); - } - - // TODO: save extra data into buffer_ - - return *this; -} - -MD5& MD5::Final() { - // TODO: call md5_final with buffer_ - return *this; -} - -std::string MD5::Digest() { - // TODO: return md5 digest in std::string (length = 16) - return ""; -} - -std::string MD5::String() { - // TODO: return md5 string (length = 32) - return ""; -} - -std::string MD5::Hash(const std::string_view &data) { - - md5_ctx ctx; - md5_final(&ctx, data.data(), data.size()); - -// std::cout << std::hex << __builtin_bswap32(ctx.A); -// std::cout << std::hex << __builtin_bswap32(ctx.B); -// std::cout << std::hex << __builtin_bswap32(ctx.C); -// std::cout << std::hex << __builtin_bswap32(ctx.D) << std::endl; - - // TODO: perf convert speed - char tmp[33]; - sprintf(tmp, "%08x%08x%08x%08x", - __builtin_bswap32(ctx.A), - __builtin_bswap32(ctx.B), - __builtin_bswap32(ctx.C), - __builtin_bswap32(ctx.D)); - return {tmp}; -} - -} // namespace md5 diff --git a/src/wrapper.cc b/src/wrapper.cc new file mode 100644 index 0000000..d3e1091 --- /dev/null +++ b/src/wrapper.cc @@ -0,0 +1,85 @@ +#include + +#include "md5.h" + +namespace md5 { + +// TODO: inline impl +MD5& MD5::Reset() { + ctx_.A = MD5_A; + ctx_.B = MD5_B; + ctx_.C = MD5_C; + ctx_.D = MD5_D; + + ctx_.size = 0; + buffer_size_ = 0; + return *this; +} + +MD5& MD5::Update(const void *data, uint64_t len) { + if (buffer_size_ != 0) { + if (buffer_size_ + len < 64) { // buffer not filled + std::memcpy(buffer_ + buffer_size_, data, len); + buffer_size_ += len; + return *this; // save into buffer and return + } + + auto size = 64 - buffer_size_; + std::memcpy(buffer_ + buffer_size_, data, size); + md5_update(&ctx_, buffer_, 64); // fill and update with buffer + data = reinterpret_cast(data) + size; + buffer_size_ = 0; + len -= size; + } // buffer is empty for now + + auto size = len & ~(uint64_t)0b111111; + md5_update(&ctx_, data, size); + data = reinterpret_cast(data) + size; + len &= 0b111111; // len -> [0, 64) + + if (len != 0) { + std::memcpy(buffer_, data, len); // save remain data into buffer + buffer_size_ = len; + } + return *this; +} + +MD5& MD5::Update(const std::string_view &data) { + return Update(data.data(), data.size()); +} + +MD5& MD5::Final() { + md5_final(&ctx_, buffer_, buffer_size_); + return *this; +} + +std::string MD5::Digest() const { + // TODO: perf convert speed + char tmp[33]; + sprintf(tmp, "%08x%08x%08x%08x", + __builtin_bswap32(ctx_.A), + __builtin_bswap32(ctx_.B), + __builtin_bswap32(ctx_.C), + __builtin_bswap32(ctx_.D)); + return {tmp}; +} + +std::string MD5::Hash(const void *data, uint64_t len) { + md5_ctx ctx; + md5_final(&ctx, data, len); + + // TODO: perf convert speed + char tmp[33]; + sprintf(tmp, "%08x%08x%08x%08x", + __builtin_bswap32(ctx.A), + __builtin_bswap32(ctx.B), + __builtin_bswap32(ctx.C), + __builtin_bswap32(ctx.D)); + return {tmp}; +} + +std::string MD5::Hash(const std::string_view &data) { + return Hash(data.data(), data.size()); +} + +} // namespace md5 diff --git a/test/md5_update.cc b/test/md5_update.cc index f2649c0..c1524b4 100644 --- a/test/md5_update.cc +++ b/test/md5_update.cc @@ -4,46 +4,6 @@ using namespace md5; -std::string test_data() { - char data[64]; - for (char i = 0; i < 64; ++i) { - data[i] = i; - } - return {data, data + 64}; -} - -//void dump_ctx(const md5::MD5::md5_ctx *c) { -// std::cout << std::hex << c->A << std::endl; -// std::cout << std::hex << c->B << std::endl; -// std::cout << std::hex << c->C << std::endl; -// std::cout << std::hex << c->D << std::endl; -// std::cout << std::dec << c->size << std::endl; -//} - -TEST(md5sum, main) { - auto data = test_data() + test_data() + test_data() + test_data(); - -// md5::MD5::md5_ctx c; -// dump_ctx(&c); - - // md5::md5_update(&c, data.c_str(), data.size()); - // md5::md5_update(&c, data.c_str(), data.size()); - // dump_ctx(&c); - - // md5::md5_reset(&c); - // md5::md5_update(&c, data.c_str(), data.size()); - // md5::md5_update(&c, data.c_str(), data.size()); - // dump_ctx(&c); - -// md5::MD5::md5_final(&c, data.c_str(), 0); -// dump_ctx(&c); -// -// md5::MD5::md5_reset(&c); -// md5::MD5::md5_final(&c, data.c_str(), data.size()); -// dump_ctx(&c); - -} - TEST(md5sum, hash) { auto test_data = [](uint8_t size) -> std::string { std::string data {}; @@ -326,3 +286,23 @@ TEST(md5sum, hash) { EXPECT_EQ(MD5::Hash(test_data(0xfe)), "7bdac450b9343317aa89895d4dda181e"); EXPECT_EQ(MD5::Hash(test_data(0xff)), "11b7aaa64c413d2f0fccf893881c46a2"); } + + +TEST(md5sum, update) { + + std::string test_data {}; + test_data.resize(256 * 256); + + for (int i = 0; i < test_data.size(); ++i) { + test_data[i] = i & 0xff; + } + + for (int round = 1; round <= 256; ++round) { + MD5 md5_obj; + for (int i = 0; i < 256; ++i) { + md5_obj.Update(test_data.data() + i * round, round); + } + EXPECT_EQ(md5_obj.Final().Digest(), MD5::Hash(test_data.data(), 256 * round)); + } + +}