Grok  10.0.3
highway.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // This include guard is checked by foreach_target, so avoid the usual _H_
17 // suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
18 // after/outside this include guard.
19 #ifndef HWY_HIGHWAY_INCLUDED
20 #define HWY_HIGHWAY_INCLUDED
21 
22 // Main header required before using vector types.
23 
24 #include "hwy/base.h"
25 #include "hwy/targets.h"
26 
27 namespace hwy {
28 
29 // API version (https://semver.org/); keep in sync with CMakeLists.txt.
30 #define HWY_MAJOR 0
31 #define HWY_MINOR 17
32 #define HWY_PATCH 0
33 
34 //------------------------------------------------------------------------------
35 // Shorthand for tags (defined in shared-inl.h) used to select overloads.
36 // Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over
37 // HWY_CAPPED(T, N).
38 
39 // HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of
40 // registers in the group, and is ignored on targets that do not support groups.
41 #define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T>
42 #define HWY_FULL2(T, LMUL) \
43  hwy::HWY_NAMESPACE::ScalableTag<T, CeilLog2(HWY_MAX(0, LMUL))>
44 #define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
45 // Workaround for MSVC grouping __VA_ARGS__ into a single argument
46 #define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
47 // Trailing comma avoids -pedantic false alarm
48 #define HWY_CHOOSE_FULL(...) \
49  HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
50 #define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
51 
52 // Vector of up to MAX_N lanes. It's better to use full vectors where possible.
53 #define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N>
54 
55 //------------------------------------------------------------------------------
56 // Export user functions for static/dynamic dispatch
57 
58 // Evaluates to 0 inside a translation unit if it is generating anything but the
59 // static target (the last one if multiple targets are enabled). Used to prevent
60 // redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only
61 // compile once anyway, so this is 1 unless it is or has been included.
62 #ifndef HWY_ONCE
63 #define HWY_ONCE 1
64 #endif
65 
66 // HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for
67 // HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is
68 // defined), and can be used to deduce the return type of Choose*.
69 #if HWY_STATIC_TARGET == HWY_SCALAR
70 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
71 #elif HWY_STATIC_TARGET == HWY_EMU128
72 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_EMU128::FUNC_NAME
73 #elif HWY_STATIC_TARGET == HWY_RVV
74 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
75 #elif HWY_STATIC_TARGET == HWY_WASM_EMU256
76 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM2::FUNC_NAME
77 #elif HWY_STATIC_TARGET == HWY_WASM
78 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
79 #elif HWY_STATIC_TARGET == HWY_NEON
80 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
81 #elif HWY_STATIC_TARGET == HWY_SVE
82 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
83 #elif HWY_STATIC_TARGET == HWY_SVE2
84 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
85 #elif HWY_STATIC_TARGET == HWY_SVE_256
86 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE_256::FUNC_NAME
87 #elif HWY_STATIC_TARGET == HWY_SVE2_128
88 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2_128::FUNC_NAME
89 #elif HWY_STATIC_TARGET == HWY_PPC8
90 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
91 #elif HWY_STATIC_TARGET == HWY_SSSE3
92 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME
93 #elif HWY_STATIC_TARGET == HWY_SSE4
94 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
95 #elif HWY_STATIC_TARGET == HWY_AVX2
96 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
97 #elif HWY_STATIC_TARGET == HWY_AVX3
98 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
99 #elif HWY_STATIC_TARGET == HWY_AVX3_DL
100 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME
101 #endif
102 
103 // Dynamic dispatch declarations.
104 
105 template <typename RetType, typename... Args>
107  public:
108  typedef RetType(FunctionType)(Args...);
109 
110  // A template function that when instantiated has the same signature as the
111  // function being called. This function initializes the global cache of the
112  // current supported targets mask used for dynamic dispatch and calls the
113  // appropriate function. Since this mask used for dynamic dispatch is a
114  // global cache, all the highway exported functions, even those exposed by
115  // different modules, will be initialized after this function runs for any one
116  // of those exported functions.
117  template <FunctionType* const table[]>
118  static RetType ChooseAndCall(Args... args) {
119  // If we are running here it means we need to update the chosen target.
120  ChosenTarget& chosen_target = GetChosenTarget();
121  chosen_target.Update(SupportedTargets());
122  return (table[chosen_target.GetIndex()])(args...);
123  }
124 };
125 
126 // Factory function only used to infer the template parameters RetType and Args
127 // from a function passed to the factory.
128 template <typename RetType, typename... Args>
129 FunctionCache<RetType, Args...> FunctionCacheFactory(RetType (*)(Args...)) {
130  return FunctionCache<RetType, Args...>();
131 }
132 
133 // HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or
134 // nullptr is that target was not compiled.
135 #if HWY_TARGETS & HWY_EMU128
136 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME
137 #elif HWY_TARGETS & HWY_SCALAR
138 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME
139 #else
140 // When HWY_SCALAR/HWY_EMU128 are not present and other targets were disabled at
141 // runtime, fall back to the baseline with HWY_STATIC_DISPATCH().
142 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
143 #endif
144 
145 #if HWY_TARGETS & HWY_WASM_EMU256
146 #define HWY_CHOOSE_WASM2(FUNC_NAME) &N_WASM2::FUNC_NAME
147 #else
148 #define HWY_CHOOSE_WASM2(FUNC_NAME) nullptr
149 #endif
150 
151 #if HWY_TARGETS & HWY_WASM
152 #define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
153 #else
154 #define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
155 #endif
156 
157 #if HWY_TARGETS & HWY_RVV
158 #define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
159 #else
160 #define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
161 #endif
162 
163 #if HWY_TARGETS & HWY_NEON
164 #define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
165 #else
166 #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
167 #endif
168 
169 #if HWY_TARGETS & HWY_SVE
170 #define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
171 #else
172 #define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
173 #endif
174 
175 #if HWY_TARGETS & HWY_SVE2
176 #define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
177 #else
178 #define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
179 #endif
180 
181 #if HWY_TARGETS & HWY_SVE_256
182 #define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME
183 #else
184 #define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr
185 #endif
186 
187 #if HWY_TARGETS & HWY_SVE2_128
188 #define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME
189 #else
190 #define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr
191 #endif
192 
193 #if HWY_TARGETS & HWY_PPC8
194 #define HWY_CHOOSE_PCC8(FUNC_NAME) &N_PPC8::FUNC_NAME
195 #else
196 #define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
197 #endif
198 
199 #if HWY_TARGETS & HWY_SSSE3
200 #define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
201 #else
202 #define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
203 #endif
204 
205 #if HWY_TARGETS & HWY_SSE4
206 #define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
207 #else
208 #define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
209 #endif
210 
211 #if HWY_TARGETS & HWY_AVX2
212 #define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
213 #else
214 #define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
215 #endif
216 
217 #if HWY_TARGETS & HWY_AVX3
218 #define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
219 #else
220 #define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
221 #endif
222 
223 #if HWY_TARGETS & HWY_AVX3_DL
224 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
225 #else
226 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
227 #endif
228 
229 #define HWY_DISPATCH_TABLE(FUNC_NAME) \
230  HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
231 
232 // HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
233 // HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. This
234 // static array must be defined at the same namespace level as the function
235 // it is exporting.
236 // After being exported, it can be called from other parts of the same source
237 // file using HWY_DYNAMIC_DISTPATCH(), in particular from a function wrapper
238 // like in the following example:
239 //
240 // #include "hwy/highway.h"
241 // HWY_BEFORE_NAMESPACE();
242 // namespace skeleton {
243 // namespace HWY_NAMESPACE {
244 //
245 // void MyFunction(int a, char b, const char* c) { ... }
246 //
247 // // NOLINTNEXTLINE(google-readability-namespace-comments)
248 // } // namespace HWY_NAMESPACE
249 // } // namespace skeleton
250 // HWY_AFTER_NAMESPACE();
251 //
252 // namespace skeleton {
253 // HWY_EXPORT(MyFunction); // Defines the dispatch table in this scope.
254 //
255 // void MyFunction(int a, char b, const char* c) {
256 // return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
257 // }
258 // } // namespace skeleton
259 //
260 
261 #if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
262 
263 // Simplified version for IDE or the dynamic dispatch case with only one target.
264 // This case still uses a table, although of a single element, to provide the
265 // same compile error conditions as with the dynamic dispatch case when multiple
266 // targets are being compiled.
267 #define HWY_EXPORT(FUNC_NAME) \
268  HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
269  HWY_DISPATCH_TABLE(FUNC_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
270 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
271 
272 #else
273 
274 // Dynamic dispatch case with one entry per dynamic target plus the fallback
275 // target and the initialization wrapper.
276 #define HWY_EXPORT(FUNC_NAME) \
277  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
278  FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
279  /* The first entry in the table initializes the global cache and \
280  * calls the appropriate function. */ \
281  &decltype(hwy::FunctionCacheFactory(&HWY_STATIC_DISPATCH( \
282  FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
283  HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
284  HWY_CHOOSE_FALLBACK(FUNC_NAME), \
285  }
286 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
287  (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()]))
288 
289 #endif // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
290 
291 // DEPRECATED names; please use HWY_HAVE_* instead.
292 #define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
293 #define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
294 #define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64
295 
296 } // namespace hwy
297 
298 #endif // HWY_HIGHWAY_INCLUDED
299 
300 //------------------------------------------------------------------------------
301 
302 // NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want
303 // to include them once per target, which is ensured by the toggle check.
304 // Because ops/*.h are included under it, they do not need their own guard.
305 #if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
306 #ifdef HWY_HIGHWAY_PER_TARGET
307 #undef HWY_HIGHWAY_PER_TARGET
308 #else
309 #define HWY_HIGHWAY_PER_TARGET
310 #endif
311 
312 // These define ops inside namespace hwy::HWY_NAMESPACE.
313 #if HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
314 #include "hwy/ops/x86_128-inl.h"
315 #elif HWY_TARGET == HWY_AVX2
316 #include "hwy/ops/x86_256-inl.h"
317 #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
318 #include "hwy/ops/x86_512-inl.h"
319 #elif HWY_TARGET == HWY_PPC8
320 #error "PPC is not yet supported"
321 #elif HWY_TARGET == HWY_NEON
322 #include "hwy/ops/arm_neon-inl.h"
323 #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || \
324  HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
325 #include "hwy/ops/arm_sve-inl.h"
326 #elif HWY_TARGET == HWY_WASM_EMU256
327 #include "hwy/ops/wasm_256-inl.h"
328 #elif HWY_TARGET == HWY_WASM
329 #include "hwy/ops/wasm_128-inl.h"
330 #elif HWY_TARGET == HWY_RVV
331 #include "hwy/ops/rvv-inl.h"
332 #elif HWY_TARGET == HWY_EMU128
333 #include "hwy/ops/emu128-inl.h"
334 #elif HWY_TARGET == HWY_SCALAR
335 #include "hwy/ops/scalar-inl.h"
336 #else
337 #pragma message("HWY_TARGET does not match any known target")
338 #endif // HWY_TARGET
339 
340 #include "hwy/ops/generic_ops-inl.h"
341 
342 #endif // HWY_HIGHWAY_PER_TARGET
Definition: aligned_allocator.h:27
FunctionCache< RetType, Args... > FunctionCacheFactory(RetType(*)(Args...))
Definition: highway.h:129
HWY_DLLEXPORT ChosenTarget & GetChosenTarget()
HWY_DLLEXPORT uint32_t SupportedTargets()
Definition: targets.h:242
size_t HWY_INLINE GetIndex() const
Definition: targets.h:265
void Update(uint32_t targets)
Definition: targets.h:246
Definition: highway.h:106
RetType() FunctionType(Args...)
Definition: highway.h:108
static RetType ChooseAndCall(Args... args)
Definition: highway.h:118