19 #ifndef HWY_HIGHWAY_INCLUDED
20 #define HWY_HIGHWAY_INCLUDED
41 #define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T>
42 #define HWY_FULL2(T, LMUL) \
43 hwy::HWY_NAMESPACE::ScalableTag<T, CeilLog2(HWY_MAX(0, LMUL))>
44 #define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
46 #define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
48 #define HWY_CHOOSE_FULL(...) \
49 HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
50 #define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
53 #define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N>
69 #if HWY_STATIC_TARGET == HWY_SCALAR
70 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
71 #elif HWY_STATIC_TARGET == HWY_EMU128
72 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_EMU128::FUNC_NAME
73 #elif HWY_STATIC_TARGET == HWY_RVV
74 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
75 #elif HWY_STATIC_TARGET == HWY_WASM_EMU256
76 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM2::FUNC_NAME
77 #elif HWY_STATIC_TARGET == HWY_WASM
78 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
79 #elif HWY_STATIC_TARGET == HWY_NEON
80 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
81 #elif HWY_STATIC_TARGET == HWY_SVE
82 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
83 #elif HWY_STATIC_TARGET == HWY_SVE2
84 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
85 #elif HWY_STATIC_TARGET == HWY_SVE_256
86 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE_256::FUNC_NAME
87 #elif HWY_STATIC_TARGET == HWY_SVE2_128
88 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2_128::FUNC_NAME
89 #elif HWY_STATIC_TARGET == HWY_PPC8
90 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
91 #elif HWY_STATIC_TARGET == HWY_SSSE3
92 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME
93 #elif HWY_STATIC_TARGET == HWY_SSE4
94 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
95 #elif HWY_STATIC_TARGET == HWY_AVX2
96 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
97 #elif HWY_STATIC_TARGET == HWY_AVX3
98 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
99 #elif HWY_STATIC_TARGET == HWY_AVX3_DL
100 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME
105 template <
typename RetType,
typename... Args>
117 template <FunctionType* const table[]>
122 return (table[chosen_target.
GetIndex()])(args...);
128 template <
typename RetType,
typename... Args>
135 #if HWY_TARGETS & HWY_EMU128
136 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME
137 #elif HWY_TARGETS & HWY_SCALAR
138 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME
142 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
145 #if HWY_TARGETS & HWY_WASM_EMU256
146 #define HWY_CHOOSE_WASM2(FUNC_NAME) &N_WASM2::FUNC_NAME
148 #define HWY_CHOOSE_WASM2(FUNC_NAME) nullptr
151 #if HWY_TARGETS & HWY_WASM
152 #define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
154 #define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
157 #if HWY_TARGETS & HWY_RVV
158 #define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
160 #define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
163 #if HWY_TARGETS & HWY_NEON
164 #define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
166 #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
169 #if HWY_TARGETS & HWY_SVE
170 #define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
172 #define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
175 #if HWY_TARGETS & HWY_SVE2
176 #define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
178 #define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
181 #if HWY_TARGETS & HWY_SVE_256
182 #define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME
184 #define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr
187 #if HWY_TARGETS & HWY_SVE2_128
188 #define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME
190 #define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr
193 #if HWY_TARGETS & HWY_PPC8
194 #define HWY_CHOOSE_PCC8(FUNC_NAME) &N_PPC8::FUNC_NAME
196 #define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
199 #if HWY_TARGETS & HWY_SSSE3
200 #define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
202 #define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
205 #if HWY_TARGETS & HWY_SSE4
206 #define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
208 #define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
211 #if HWY_TARGETS & HWY_AVX2
212 #define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
214 #define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
217 #if HWY_TARGETS & HWY_AVX3
218 #define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
220 #define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
223 #if HWY_TARGETS & HWY_AVX3_DL
224 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
226 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
229 #define HWY_DISPATCH_TABLE(FUNC_NAME) \
230 HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
261 #if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
267 #define HWY_EXPORT(FUNC_NAME) \
268 HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
269 HWY_DISPATCH_TABLE(FUNC_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
270 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
276 #define HWY_EXPORT(FUNC_NAME) \
277 static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
278 FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
281 &decltype(hwy::FunctionCacheFactory(&HWY_STATIC_DISPATCH( \
282 FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
283 HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
284 HWY_CHOOSE_FALLBACK(FUNC_NAME), \
286 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
287 (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()]))
292 #define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
293 #define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
294 #define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64
305 #if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
306 #ifdef HWY_HIGHWAY_PER_TARGET
307 #undef HWY_HIGHWAY_PER_TARGET
309 #define HWY_HIGHWAY_PER_TARGET
313 #if HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
315 #elif HWY_TARGET == HWY_AVX2
317 #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
319 #elif HWY_TARGET == HWY_PPC8
320 #error "PPC is not yet supported"
321 #elif HWY_TARGET == HWY_NEON
323 #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || \
324 HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
326 #elif HWY_TARGET == HWY_WASM_EMU256
328 #elif HWY_TARGET == HWY_WASM
330 #elif HWY_TARGET == HWY_RVV
332 #elif HWY_TARGET == HWY_EMU128
334 #elif HWY_TARGET == HWY_SCALAR
337 #pragma message("HWY_TARGET does not match any known target")
Definition: aligned_allocator.h:27
FunctionCache< RetType, Args... > FunctionCacheFactory(RetType(*)(Args...))
Definition: highway.h:129
HWY_DLLEXPORT ChosenTarget & GetChosenTarget()
HWY_DLLEXPORT uint32_t SupportedTargets()
Definition: targets.h:242
size_t HWY_INLINE GetIndex() const
Definition: targets.h:265
void Update(uint32_t targets)
Definition: targets.h:246
Definition: highway.h:106
RetType() FunctionType(Args...)
Definition: highway.h:108
static RetType ChooseAndCall(Args... args)
Definition: highway.h:118