Browse Source

feat: add loop-unroll support

legacy
Dnomd343 1 month ago
parent
commit
57c10ee5f2
  1. 2
      src/core/ranges/internal/derive.cc
  2. 5
      src/core/ranges/internal/ranges.cc
  3. 2
      src/core/ranges/internal/ranges_union.inl
  4. 10
      src/core/utils/utility.h

2
src/core/ranges/internal/derive.cc

@ -39,7 +39,7 @@ void Ranges::derive(const int head, Ranges &output) const {
/// ( xx xx xx ) xx xx xx ... [reversed range]
/// +1 00 00 00 ... (delta)
const uint32_t delta = 1U << (32 - offset * 2); // distance to next possible range
const auto min_next = delta + range_reverse((*this)[index]) & ~(delta - 1);
const auto min_next = delta + (range_reverse((*this)[index]) & ~(delta - 1));
if (min_next > max_val) {
break; // index has overflowed
}

5
src/core/ranges/internal/ranges.cc

@ -39,7 +39,7 @@ KLSK_INLINE CommonCode RangesUnion::operator[](size_type n) const {
}
n -= ranges(0).size();
#pragma unroll
KLSK_UNROLL(sizeof(Heads) - 2)
for (const uint64_t head : std::to_array({0x1, 0x2, 0x4, 0x5, 0x6, 0x8, 0x9, 0xA, 0xC, 0xD})) {
if (n < ranges(head).size()) {
return CommonCode::unsafe_create(head << 32 | ranges(head)[n]);
@ -48,7 +48,4 @@ KLSK_INLINE CommonCode RangesUnion::operator[](size_type n) const {
}
return CommonCode::unsafe_create((uint64_t)0xE << 32 | ranges(0xE)[n]);
// std::unreachable();
}

2
src/core/ranges/internal/ranges_union.inl

@ -12,7 +12,7 @@ inline const Ranges& RangesUnion::ranges(const size_t head) const {
KLSK_INLINE_H size_t RangesUnion::size() const {
size_type size = 0;
#pragma unroll
KLSK_UNROLL(sizeof(Heads))
for (const auto head : Heads) {
size += ranges(head).size();
}

10
src/core/utils/utility.h

@ -41,7 +41,15 @@
#define KLSK_INLINE_H KLSK_INLINE inline
#define KLSK_INLINE_CE KLSK_INLINE constexpr
// TODO: using `#pragma GCC unroll`
#define KLSK_STRING(x) #x
#if defined(__clang__)
#define KLSK_UNROLL(N) _Pragma(KLSK_STRING(unroll N))
#elif defined(__GNUC__)
#define KLSK_UNROLL(N) _Pragma(KLSK_STRING(GCC unroll N))
#else
#define KLSK_UNROLL(N)
#endif
/// Prevent reordering for both compiler and processor.
#define KLSK_MEM_BARRIER std::atomic_thread_fence(std::memory_order_seq_cst)

Loading…
Cancel
Save