16 #ifndef HIGHWAY_HWY_BASE_H_
17 #define HIGHWAY_HWY_BASE_H_
34 #define HWY_STR_IMPL(macro) #macro
35 #define HWY_STR(macro) HWY_STR_IMPL(macro)
41 #define HWY_RESTRICT __restrict
42 #define HWY_INLINE __forceinline
43 #define HWY_NOINLINE __declspec(noinline)
45 #define HWY_NORETURN __declspec(noreturn)
46 #define HWY_LIKELY(expr) (expr)
47 #define HWY_UNLIKELY(expr) (expr)
48 #define HWY_PRAGMA(tokens) __pragma(tokens)
49 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
50 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
51 #define HWY_MAYBE_UNUSED
52 #define HWY_HAS_ASSUME_ALIGNED 0
53 #if (_MSC_VER >= 1700)
54 #define HWY_MUST_USE_RESULT _Check_return_
56 #define HWY_MUST_USE_RESULT
61 #define HWY_RESTRICT __restrict__
62 #define HWY_INLINE inline __attribute__((always_inline))
63 #define HWY_NOINLINE __attribute__((noinline))
64 #define HWY_FLATTEN __attribute__((flatten))
65 #define HWY_NORETURN __attribute__((noreturn))
66 #define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
67 #define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
68 #define HWY_PRAGMA(tokens) _Pragma(#tokens)
69 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
70 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
73 #define HWY_MAYBE_UNUSED __attribute__((unused))
74 #define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
82 #if HWY_HAS_ATTRIBUTE(__format__)
83 #define HWY_FORMAT(idx_fmt, idx_arg) \
84 __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
86 #define HWY_FORMAT(idx_fmt, idx_arg)
94 #if HWY_HAS_BUILTIN(__builtin_assume_aligned)
95 #define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
97 #define HWY_ASSUME_ALIGNED(ptr, align) (ptr)
103 #if HWY_COMPILER_CLANG
104 #define HWY_PUSH_ATTRIBUTES(targets_str) \
105 HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
106 apply_to = function))
107 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
108 #elif HWY_COMPILER_GCC
109 #define HWY_PUSH_ATTRIBUTES(targets_str) \
110 HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
111 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
113 #define HWY_PUSH_ATTRIBUTES(targets_str)
114 #define HWY_POP_ATTRIBUTES
120 #define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
122 #define HWY_CONCAT_IMPL(a, b) a##b
123 #define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
125 #define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
126 #define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
132 #define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
139 #define HWY_REP4(literal) literal, literal, literal, literal
141 #define HWY_ABORT(format, ...) \
142 ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
145 #define HWY_ASSERT(condition) \
147 if (!(condition)) { \
148 HWY_ABORT("Assert %s", #condition); \
152 #if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
153 #define HWY_IS_MSAN 1
155 #define HWY_IS_MSAN 0
158 #if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER)
159 #define HWY_IS_ASAN 1
161 #define HWY_IS_ASAN 0
164 #if HWY_HAS_FEATURE(thread_sanitizer) || defined(THREAD_SANITIZER)
165 #define HWY_IS_TSAN 1
167 #define HWY_IS_TSAN 0
173 #define HWY_ATTR_NO_MSAN __attribute__((no_sanitize_memory))
175 #define HWY_ATTR_NO_MSAN
179 #if !defined(HWY_IS_DEBUG_BUILD)
182 #if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || HWY_IS_ASAN || \
183 HWY_IS_MSAN || HWY_IS_TSAN || defined(__clang_analyzer__)
184 #define HWY_IS_DEBUG_BUILD 1
186 #define HWY_IS_DEBUG_BUILD 0
190 #if HWY_IS_DEBUG_BUILD
191 #define HWY_DASSERT(condition) HWY_ASSERT(condition)
193 #define HWY_DASSERT(condition) \
205 #elif HWY_ARCH_RVV && defined(__riscv_vector)
219 #define HWY_ALIGN_MAX alignas(64)
220 #elif HWY_ARCH_RVV && defined(__riscv_vector)
221 #define HWY_ALIGN_MAX alignas(8)
223 #define HWY_ALIGN_MAX alignas(16)
232 #if HWY_ARCH_ARM && (__ARM_FP & 2)
233 #define HWY_NATIVE_FLOAT16 1
235 #define HWY_NATIVE_FLOAT16 0
238 #pragma pack(push, 1)
240 #if HWY_NATIVE_FLOAT16
241 using float16_t = __fp16;
260 #pragma pack(push, 1)
301 template <
bool Condition>
308 template <
bool Condition>
311 template <
typename T,
typename U>
316 template <
typename T>
321 template <
typename T,
typename U>
332 #define HWY_IF_LE128(T, N) hwy::EnableIf<N * sizeof(T) <= 16>* = nullptr
333 #define HWY_IF_LE64(T, N) hwy::EnableIf<N * sizeof(T) <= 8>* = nullptr
334 #define HWY_IF_LE32(T, N) hwy::EnableIf<N * sizeof(T) <= 4>* = nullptr
335 #define HWY_IF_GE32(T, N) hwy::EnableIf<N * sizeof(T) >= 4>* = nullptr
336 #define HWY_IF_GE64(T, N) hwy::EnableIf<N * sizeof(T) >= 8>* = nullptr
337 #define HWY_IF_GE128(T, N) hwy::EnableIf<N * sizeof(T) >= 16>* = nullptr
338 #define HWY_IF_GT128(T, N) hwy::EnableIf<(N * sizeof(T) > 16)>* = nullptr
340 #define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr
341 #define HWY_IF_SIGNED(T) \
342 hwy::EnableIf<IsSigned<T>() && !IsFloat<T>()>* = nullptr
343 #define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
344 #define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
346 #define HWY_IF_LANE_SIZE(T, bytes) \
347 hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
348 #define HWY_IF_NOT_LANE_SIZE(T, bytes) \
349 hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
350 #define HWY_IF_LANE_SIZE_LT(T, bytes) \
351 hwy::EnableIf<sizeof(T) < (bytes)>* = nullptr
353 #define HWY_IF_LANES_PER_BLOCK(T, N, LANES) \
354 hwy::EnableIf<HWY_MIN(sizeof(T) * N, 16) / sizeof(T) == (LANES)>* = nullptr
377 template <
typename T>
502 template <
typename T>
504 template <
typename T>
506 template <
typename T>
510 template <
typename T>
512 template <
typename T>
526 template <
typename T>
530 return IsSame<T, float>() || IsSame<T, double>();
533 template <
typename T>
547 template <
typename T>
549 static_assert(!IsFloat<T>(),
"Only for integer types");
551 return static_cast<T
>(IsSigned<T>() ? (
static_cast<TU
>(~0ull) >> 1)
552 :
static_cast<TU
>(~0ull));
554 template <
typename T>
556 static_assert(!IsFloat<T>(),
"Only for integer types");
557 return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
562 template <
typename T>
564 return LimitsMin<T>();
568 return -3.402823466e+38F;
572 return -1.7976931348623158e+308;
575 template <
typename T>
577 return LimitsMax<T>();
581 return 3.402823466e+38F;
585 return 1.7976931348623158e+308;
589 template <
typename T>
591 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
605 template <
typename T>
611 template <
typename T>
617 template <
typename T>
619 return (~(
MakeUnsigned<T>{1} << MantissaBits<T>()) + 1) & ~SignMask<T>();
623 template <
typename T>
630 template <
typename T>
632 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
642 return 4503599627370496.0;
646 template <
typename T>
649 return 8 *
sizeof(T) - 1 - MantissaBits<T>();
655 template <
typename T>
663 template <
typename T1,
typename T2>
665 return (a + b - 1) / b;
669 constexpr
inline size_t RoundUpTo(
size_t what,
size_t align) {
670 return DivCeil(what, align) * align;
675 #if HWY_COMPILER_MSVC
677 _BitScanForward(&index, x);
680 return static_cast<size_t>(__builtin_ctz(x));
685 #if HWY_COMPILER_MSVC
688 _BitScanForward64(&index, x);
692 uint32_t lsb =
static_cast<uint32_t
>(x & 0xFFFFFFFF);
695 uint32_t msb =
static_cast<uint32_t
>(x >> 32u);
696 _BitScanForward(&index, msb);
699 _BitScanForward(&index, lsb);
704 return static_cast<size_t>(__builtin_ctzll(x));
710 #if HWY_COMPILER_MSVC
712 _BitScanReverse(&index, x);
715 return static_cast<size_t>(__builtin_clz(x));
720 #if HWY_COMPILER_MSVC
723 _BitScanReverse64(&index, x);
727 const uint32_t msb =
static_cast<uint32_t
>(x >> 32u);
730 const uint32_t lsb =
static_cast<uint32_t
>(x & 0xFFFFFFFF);
731 _BitScanReverse(&index, lsb);
734 _BitScanReverse(&index, msb);
739 return static_cast<size_t>(__builtin_clzll(x));
744 #if HWY_COMPILER_CLANG || HWY_COMPILER_GCC
745 return static_cast<size_t>(__builtin_popcountll(x));
750 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64 && defined(__AVX__)
751 return _mm_popcnt_u64(x);
752 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 && defined(__AVX__)
753 return _mm_popcnt_u32(
static_cast<uint32_t
>(x & 0xFFFFFFFFu)) +
754 _mm_popcnt_u32(
static_cast<uint32_t
>(x >> 32));
756 x -= ((x >> 1) & 0x5555555555555555ULL);
757 x = (((x >> 2) & 0x3333333333333333ULL) + (x & 0x3333333333333333ULL));
758 x = (((x >> 4) + x) & 0x0F0F0F0F0F0F0F0FULL);
762 return static_cast<size_t>(x & 0x7Fu);
769 template <
typename TI>
773 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(x >> 1)) + 1);
776 template <
typename TI>
780 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(x - 1)) + 1);
783 #if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
784 #pragma intrinsic(_umul128)
789 #if defined(__SIZEOF_INT128__)
790 __uint128_t product = (__uint128_t)a * (__uint128_t)b;
791 *upper = (uint64_t)(product >> 64);
792 return (uint64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
793 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
794 return _umul128(a, b, upper);
796 constexpr uint64_t kLo32 = 0xFFFFFFFFU;
797 const uint64_t lo_lo = (a & kLo32) * (b & kLo32);
798 const uint64_t hi_lo = (a >> 32) * (b & kLo32);
799 const uint64_t lo_hi = (a & kLo32) * (b >> 32);
800 const uint64_t hi_hi = (a >> 32) * (b >> 32);
801 const uint64_t t = (lo_lo >> 32) + (hi_lo & kLo32) + lo_hi;
802 *upper = (hi_lo >> 32) + (t >> 32) + hi_hi;
803 return (t << 32) | (lo_lo & kLo32);
807 #if HWY_COMPILER_MSVC
808 #pragma intrinsic(memcpy)
809 #pragma intrinsic(memset)
813 template <
size_t kBytes,
typename From,
typename To>
815 #if HWY_COMPILER_MSVC
816 memcpy(to, from, kBytes);
818 __builtin_memcpy(to, from, kBytes);
822 template <
size_t kBytes,
typename To>
824 #if HWY_COMPILER_MSVC
825 memset(to, 0, kBytes);
827 __builtin_memset(to, 0, kBytes);
832 uint32_t bits = bf.
bits;
835 CopyBytes<4>(&bits, &f);
841 CopyBytes<4>(&f, &bits);
843 bf.
bits =
static_cast<uint16_t
>(bits >> 16);
#define HWY_RESTRICT
Definition: base.h:61
#define HWY_NORETURN
Definition: base.h:65
#define HWY_API
Definition: base.h:120
#define HWY_MAYBE_UNUSED
Definition: base.h:73
#define HWY_DLLEXPORT
Definition: highway_export.h:13
Definition: aligned_allocator.h:27
HWY_API void CopyBytes(const From *from, To *to)
Definition: base.h:814
constexpr T MantissaEnd()
Definition: base.h:631
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x)
Definition: base.h:684
HWY_API float F32FromBF16(bfloat16_t bf)
Definition: base.h:831
HWY_API void ZeroBytes(To *to)
Definition: base.h:823
constexpr HWY_API T LimitsMin()
Definition: base.h:555
HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t *HWY_RESTRICT upper)
Definition: base.h:788
HWY_API bfloat16_t BF16FromF32(float f)
Definition: base.h:839
typename detail::TypeFromSize< N >::Float FloatFromSize
Definition: base.h:521
typename RemoveConstT< T >::type RemoveConst
Definition: base.h:370
typename detail::TypeFromSize< N >::Unsigned UnsignedFromSize
Definition: base.h:517
constexpr float HighestValue< float >()
Definition: base.h:580
constexpr HWY_API T LimitsMax()
Definition: base.h:548
typename detail::TypeFromSize< N >::Signed SignedFromSize
Definition: base.h:519
constexpr T1 DivCeil(T1 a, T2 b)
Definition: base.h:664
constexpr float MantissaEnd< float >()
Definition: base.h:636
double float64_t
Definition: base.h:258
constexpr bool IsSigned< bfloat16_t >()
Definition: base.h:542
constexpr MakeUnsigned< T > MantissaMask()
Definition: base.h:624
constexpr size_t FloorLog2(TI x)
Definition: base.h:770
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:709
constexpr bool IsSigned< float16_t >()
Definition: base.h:538
constexpr double HighestValue< double >()
Definition: base.h:584
constexpr int MantissaBits< double >()
Definition: base.h:599
typename EnableIfT< Condition >::type EnableIf
Definition: base.h:309
constexpr HWY_API bool IsFloat()
Definition: base.h:527
static HWY_MAYBE_UNUSED bool operator>(const uint128_t &a, const uint128_t &b)
Definition: base.h:283
float float32_t
Definition: base.h:257
HWY_API size_t PopCount(uint64_t x)
Definition: base.h:743
constexpr double MantissaEnd< double >()
Definition: base.h:640
constexpr int MantissaBits()
Definition: base.h:590
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:674
constexpr float LowestValue< float >()
Definition: base.h:567
constexpr HWY_API bool IsSame()
Definition: base.h:322
constexpr size_t CeilLog2(TI x)
Definition: base.h:777
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero64(const uint64_t x)
Definition: base.h:719
constexpr double LowestValue< double >()
Definition: base.h:571
static HWY_MAYBE_UNUSED bool operator<(const uint128_t &a, const uint128_t &b)
Definition: base.h:278
constexpr MakeSigned< T > MaxExponentField()
Definition: base.h:656
constexpr HWY_API T LowestValue()
Definition: base.h:563
constexpr HWY_API T HighestValue()
Definition: base.h:576
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize
Definition: base.h:209
constexpr HWY_API bool IsSigned()
Definition: base.h:534
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:503
typename detail::Relations< T >::Wide MakeWide
Definition: base.h:511
typename detail::Relations< T >::Float MakeFloat
Definition: base.h:507
typename detail::Relations< T >::Signed MakeSigned
Definition: base.h:505
constexpr int MantissaBits< float >()
Definition: base.h:595
constexpr MakeUnsigned< T > SignMask()
Definition: base.h:612
HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) Abort(const char *file
HWY_DLLEXPORT HWY_NORETURN void int const char * format
Definition: base.h:848
HWY_DLLEXPORT HWY_NORETURN void int line
Definition: base.h:848
constexpr size_t RoundUpTo(size_t what, size_t align)
Definition: base.h:669
typename detail::Relations< T >::Narrow MakeNarrow
Definition: base.h:513
constexpr MakeUnsigned< T > ExponentMask()
Definition: base.h:618
constexpr int ExponentBits()
Definition: base.h:647
constexpr MakeSigned< T > MaxExponentTimes2()
Definition: base.h:606
void type
Definition: base.h:305
@ value
Definition: base.h:313
uint64_t value
Definition: base.h:272
uint64_t key
Definition: base.h:273
T type
Definition: base.h:366
T type
Definition: base.h:362
uint16_t bits
Definition: base.h:252
int16_t Signed
Definition: base.h:451
float Wide
Definition: base.h:452
uint16_t Unsigned
Definition: base.h:450
double Float
Definition: base.h:466
uint64_t Unsigned
Definition: base.h:464
int64_t Signed
Definition: base.h:465
float Narrow
Definition: base.h:467
int16_t Signed
Definition: base.h:444
float Wide
Definition: base.h:446
uint16_t Unsigned
Definition: base.h:443
uint32_t Unsigned
Definition: base.h:456
double Wide
Definition: base.h:459
float Float
Definition: base.h:458
int32_t Signed
Definition: base.h:457
uint16_t Unsigned
Definition: base.h:400
int16_t Signed
Definition: base.h:401
int32_t Wide
Definition: base.h:402
int8_t Narrow
Definition: base.h:403
uint32_t Unsigned
Definition: base.h:415
int64_t Wide
Definition: base.h:418
float Float
Definition: base.h:417
int16_t Narrow
Definition: base.h:419
int32_t Signed
Definition: base.h:416
int32_t Narrow
Definition: base.h:434
double Float
Definition: base.h:433
uint64_t Unsigned
Definition: base.h:431
int64_t Signed
Definition: base.h:432
int16_t Wide
Definition: base.h:389
int8_t Signed
Definition: base.h:388
uint8_t Unsigned
Definition: base.h:387
uint64_t Narrow
Definition: base.h:439
uint8_t Narrow
Definition: base.h:396
int16_t Signed
Definition: base.h:394
uint32_t Wide
Definition: base.h:395
uint16_t Unsigned
Definition: base.h:393
uint32_t Unsigned
Definition: base.h:407
uint64_t Wide
Definition: base.h:410
uint16_t Narrow
Definition: base.h:411
float Float
Definition: base.h:409
int32_t Signed
Definition: base.h:408
uint32_t Narrow
Definition: base.h:427
int64_t Signed
Definition: base.h:424
uint64_t Unsigned
Definition: base.h:423
double Float
Definition: base.h:425
int8_t Signed
Definition: base.h:382
uint8_t Unsigned
Definition: base.h:381
uint16_t Wide
Definition: base.h:383
int8_t Signed
Definition: base.h:475
uint8_t Unsigned
Definition: base.h:474
int16_t Signed
Definition: base.h:480
uint16_t Unsigned
Definition: base.h:479
int32_t Signed
Definition: base.h:485
uint32_t Unsigned
Definition: base.h:484
float Float
Definition: base.h:486
double Float
Definition: base.h:492
int64_t Signed
Definition: base.h:491
uint64_t Unsigned
Definition: base.h:490
uint16_t bits
Definition: base.h:247
uint64_t lo
Definition: base.h:265
uint64_t hi
Definition: base.h:266