From 7d3df6bd91bd4b4c4dc73e88898b11957cce94eb Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 23 Mar 2024 22:49:41 +0800 Subject: [PATCH] update: fixes and improvements --- benchmark.cc | 9 +++++++ src/core.cc | 37 ++++++++++++++-------------- src/md5.h | 27 +++++++++++++++------ src/md5.inc | 34 ++++++++++++++++++++++++++ src/wrapper.cc | 66 ++++++++++++-------------------------------------- 5 files changed, 95 insertions(+), 78 deletions(-) create mode 100644 src/md5.inc diff --git a/benchmark.cc b/benchmark.cc index 790e362..e2d3a6d 100644 --- a/benchmark.cc +++ b/benchmark.cc @@ -20,6 +20,15 @@ static void MD5_Update(benchmark::State &state) { } } +static void MD5_Digest(benchmark::State &state) { + md5::MD5 kk; + for (auto _ : state) { + auto pp = kk.Digest(); + } +} + BENCHMARK(MD5_Update); +BENCHMARK(MD5_Digest); + BENCHMARK_MAIN(); diff --git a/src/core.cc b/src/core.cc index fccbf09..f22ca04 100644 --- a/src/core.cc +++ b/src/core.cc @@ -58,13 +58,13 @@ consteval uint32_t T(int index) { // index -> [0, 64) return static_cast(std::abs(val) * 0x100000000); } -void MD5::md5_update(md5_ctx *ctx, const void *data, uint64_t len) { - auto *block = reinterpret_cast(data); - auto *limit = block + (len >> 2); - auto A = ctx->A; - auto B = ctx->B; - auto C = ctx->C; - auto D = ctx->D; +const void* MD5::UpdateImpl(const void *data, uint64_t len) { + auto *block = static_cast(data); + auto *limit = block + ((len &= ~0b111111ULL) >> 2); + auto A = ctx_.A; + auto B = ctx_.B; + auto C = ctx_.C; + auto D = ctx_.D; while (block < limit) { auto A_ = A; @@ -82,33 +82,32 @@ void MD5::md5_update(md5_ctx *ctx, const void *data, uint64_t len) { block += 16; // move to next block } - ctx->A = A; - ctx->B = B; - ctx->C = C; - ctx->D = D; - ctx->size += len; + ctx_.A = A; + ctx_.B = B; + ctx_.C = C; + ctx_.D = D; + ctx_.size += len; + return static_cast(limit); } -void MD5::md5_final(md5_ctx *ctx, const void *data, uint64_t len) { +void MD5::FinalImpl(const void *data, uint64_t len) { if (len >= 120) { // len -> [64 + 56, INF) - auto size = len & ~(uint64_t)0b111111; - md5_update(ctx, data, size); - data = reinterpret_cast(data) + size; + data = UpdateImpl(data, len); len &= 0b111111; // len -> [0, 64) } unsigned char buffer[128]; // 2 blocks std::memcpy(buffer, data, len); - uint64_t total = (ctx->size + len) << 3; // total number in bit + uint64_t total = (ctx_.size + len) << 3; // total number in bit if (len < 56) { // len -> [0, 56) std::memcpy(buffer + len, Padding, 56 - len); std::memcpy(buffer + 56, &total, 8); - md5_update(ctx, buffer, 64); // update 1 block + UpdateImpl(buffer, 64); // update 1 block } else { // len -> [56, 64 + 56) std::memcpy(buffer + len, Padding, 120 - len); std::memcpy(buffer + 120, &total, 8); - md5_update(ctx, buffer, 128); // update 2 blocks + UpdateImpl(buffer, 128); // update 2 blocks } } diff --git a/src/md5.h b/src/md5.h index 6c1ae2e..3f89d09 100644 --- a/src/md5.h +++ b/src/md5.h @@ -15,16 +15,26 @@ class MD5 { public: MD5() = default; - MD5& Final(); + /// Reset for next round of hashing. MD5& Reset(); + /// Update md5 hash with specified data. MD5& Update(const std::string_view &data); - MD5& Update(const void *buffer, uint64_t len); + /// Update md5 hash with specified data. + MD5& Update(const void *data, uint64_t len); + + /// Stop streaming updates and calculate result. + MD5& Final(); + + /// Get the string result of md5. [[nodiscard]] std::string Digest() const; public: + /// Calculate the md5 hash value of the specified data. static std::string Hash(const std::string_view &data); + + /// Calculate the md5 hash value of the specified data. static std::string Hash(const void *data, uint64_t len); private: @@ -44,14 +54,15 @@ private: private: md5_ctx ctx_; char buffer_[64] {}; - uint64_t buffer_size_ = 0; + uint64_t buffer_size_ = 0; // size < 64 -private: - /// Update md5 ctx with specified data, note that `len` is a multiple of 64. - static void md5_update(md5_ctx *ctx, const void *buffer, uint64_t len); + /// Update md5 ctx with specified data, and return the pointer of unprocessed data (< 64 bytes). + const void* UpdateImpl(const void *data, uint64_t len); - /// Update and end the md5 hash with the specified data, the value of `len` has no limit. - static void md5_final(md5_ctx *ctx, const void *buffer, uint64_t len); + /// Update and final the md5 hash with the specified data. + void FinalImpl(const void *data, uint64_t len); }; } // namespace md5 + +#include "md5.inc" diff --git a/src/md5.inc b/src/md5.inc new file mode 100644 index 0000000..e2641ae --- /dev/null +++ b/src/md5.inc @@ -0,0 +1,34 @@ +#pragma once + +namespace md5 { + +inline MD5& MD5::Reset() { + ctx_.A = MD5_A; + ctx_.B = MD5_B; + ctx_.C = MD5_C; + ctx_.D = MD5_D; + ctx_.size = 0; + buffer_size_ = 0; + return *this; +} + +inline MD5& MD5::Final() { + FinalImpl(buffer_, buffer_size_); + return *this; +} + +inline MD5& MD5::Update(const std::string_view &data) { + return Update(data.data(), data.size()); +} + +inline std::string MD5::Hash(const std::string_view &data) { + return Hash(data.data(), data.size()); +} + +inline std::string MD5::Hash(const void *data, uint64_t len) { + MD5 md5; + md5.FinalImpl(data, len); + return md5.Digest(); +} + +} // namespace md5 diff --git a/src/wrapper.cc b/src/wrapper.cc index d3e1091..bad6c30 100644 --- a/src/wrapper.cc +++ b/src/wrapper.cc @@ -4,18 +4,6 @@ namespace md5 { -// TODO: inline impl -MD5& MD5::Reset() { - ctx_.A = MD5_A; - ctx_.B = MD5_B; - ctx_.C = MD5_C; - ctx_.D = MD5_D; - - ctx_.size = 0; - buffer_size_ = 0; - return *this; -} - MD5& MD5::Update(const void *data, uint64_t len) { if (buffer_size_ != 0) { if (buffer_size_ + len < 64) { // buffer not filled @@ -26,15 +14,13 @@ MD5& MD5::Update(const void *data, uint64_t len) { auto size = 64 - buffer_size_; std::memcpy(buffer_ + buffer_size_, data, size); - md5_update(&ctx_, buffer_, 64); // fill and update with buffer - data = reinterpret_cast(data) + size; + UpdateImpl(buffer_, 64); // fill and update with buffer + data = static_cast(data) + size; buffer_size_ = 0; len -= size; } // buffer is empty for now - auto size = len & ~(uint64_t)0b111111; - md5_update(&ctx_, data, size); - data = reinterpret_cast(data) + size; + data = UpdateImpl(data, len); len &= 0b111111; // len -> [0, 64) if (len != 0) { @@ -44,42 +30,20 @@ MD5& MD5::Update(const void *data, uint64_t len) { return *this; } -MD5& MD5::Update(const std::string_view &data) { - return Update(data.data(), data.size()); -} - -MD5& MD5::Final() { - md5_final(&ctx_, buffer_, buffer_size_); - return *this; -} +static constexpr char HexTable[] = { + '0','1','2','3','4','5','6','7', + '8','9','a','b','c','d','e','f', +}; std::string MD5::Digest() const { - // TODO: perf convert speed - char tmp[33]; - sprintf(tmp, "%08x%08x%08x%08x", - __builtin_bswap32(ctx_.A), - __builtin_bswap32(ctx_.B), - __builtin_bswap32(ctx_.C), - __builtin_bswap32(ctx_.D)); - return {tmp}; -} - -std::string MD5::Hash(const void *data, uint64_t len) { - md5_ctx ctx; - md5_final(&ctx, data, len); - - // TODO: perf convert speed - char tmp[33]; - sprintf(tmp, "%08x%08x%08x%08x", - __builtin_bswap32(ctx.A), - __builtin_bswap32(ctx.B), - __builtin_bswap32(ctx.C), - __builtin_bswap32(ctx.D)); - return {tmp}; -} - -std::string MD5::Hash(const std::string_view &data) { - return Hash(data.data(), data.size()); + std::string result {}; + result.resize(32); + auto *src = reinterpret_cast(&ctx_); + for (int i = 0; i < 32; ++src) { + result[i++] = HexTable[*src >> 4]; + result[i++] = HexTable[*src & 0b1111]; + } + return result; } } // namespace md5