Grok  10.0.3
set_macros-inl.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // Sets macros based on HWY_TARGET.
17 
18 // This include guard is toggled by foreach_target, so avoid the usual _H_
19 // suffix to prevent copybara from renaming it.
20 #if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
21 #ifdef HWY_SET_MACROS_PER_TARGET
22 #undef HWY_SET_MACROS_PER_TARGET
23 #else
24 #define HWY_SET_MACROS_PER_TARGET
25 #endif
26 
27 #endif // HWY_SET_MACROS_PER_TARGET
28 
29 #include "hwy/detect_targets.h"
30 
31 #undef HWY_NAMESPACE
32 #undef HWY_ALIGN
33 #undef HWY_MAX_BYTES
34 #undef HWY_LANES
35 
36 #undef HWY_HAVE_SCALABLE
37 #undef HWY_HAVE_INTEGER64
38 #undef HWY_HAVE_FLOAT16
39 #undef HWY_HAVE_FLOAT64
40 #undef HWY_MEM_OPS_MIGHT_FAULT
41 #undef HWY_NATIVE_FMA
42 #undef HWY_CAP_GE256
43 #undef HWY_CAP_GE512
44 
45 #undef HWY_TARGET_STR
46 
47 #if defined(HWY_DISABLE_PCLMUL_AES)
48 #define HWY_TARGET_STR_PCLMUL_AES ""
49 #else
50 #define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
51 #endif
52 
53 #if defined(HWY_DISABLE_BMI2_FMA)
54 #define HWY_TARGET_STR_BMI2_FMA ""
55 #else
56 #define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
57 #endif
58 
59 #if defined(HWY_DISABLE_F16C)
60 #define HWY_TARGET_STR_F16C ""
61 #else
62 #define HWY_TARGET_STR_F16C ",f16c"
63 #endif
64 
65 #define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
66 
67 #define HWY_TARGET_STR_SSE4 \
68  HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
69 // Include previous targets, which are the half-vectors of the next target.
70 #define HWY_TARGET_STR_AVX2 \
71  HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
72 #define HWY_TARGET_STR_AVX3 \
73  HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
74 
75 // Before include guard so we redefine HWY_TARGET_STR on each include,
76 // governed by the current HWY_TARGET.
77 
78 //-----------------------------------------------------------------------------
79 // SSSE3
80 #if HWY_TARGET == HWY_SSSE3
81 
82 #define HWY_NAMESPACE N_SSSE3
83 #define HWY_ALIGN alignas(16)
84 #define HWY_MAX_BYTES 16
85 #define HWY_LANES(T) (16 / sizeof(T))
86 
87 #define HWY_HAVE_SCALABLE 0
88 #define HWY_HAVE_INTEGER64 1
89 #define HWY_HAVE_FLOAT16 1
90 #define HWY_HAVE_FLOAT64 1
91 #define HWY_MEM_OPS_MIGHT_FAULT 1
92 #define HWY_NATIVE_FMA 0
93 #define HWY_CAP_GE256 0
94 #define HWY_CAP_GE512 0
95 
96 #define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
97 
98 //-----------------------------------------------------------------------------
99 // SSE4
100 #elif HWY_TARGET == HWY_SSE4
101 
102 #define HWY_NAMESPACE N_SSE4
103 #define HWY_ALIGN alignas(16)
104 #define HWY_MAX_BYTES 16
105 #define HWY_LANES(T) (16 / sizeof(T))
106 
107 #define HWY_HAVE_SCALABLE 0
108 #define HWY_HAVE_INTEGER64 1
109 #define HWY_HAVE_FLOAT16 1
110 #define HWY_HAVE_FLOAT64 1
111 #define HWY_MEM_OPS_MIGHT_FAULT 1
112 #define HWY_NATIVE_FMA 0
113 #define HWY_CAP_GE256 0
114 #define HWY_CAP_GE512 0
115 
116 #define HWY_TARGET_STR HWY_TARGET_STR_SSE4
117 
118 //-----------------------------------------------------------------------------
119 // AVX2
120 #elif HWY_TARGET == HWY_AVX2
121 
122 #define HWY_NAMESPACE N_AVX2
123 #define HWY_ALIGN alignas(32)
124 #define HWY_MAX_BYTES 32
125 #define HWY_LANES(T) (32 / sizeof(T))
126 
127 #define HWY_HAVE_SCALABLE 0
128 #define HWY_HAVE_INTEGER64 1
129 #define HWY_HAVE_FLOAT16 1
130 #define HWY_HAVE_FLOAT64 1
131 #define HWY_MEM_OPS_MIGHT_FAULT 1
132 
133 #ifdef HWY_DISABLE_BMI2_FMA
134 #define HWY_NATIVE_FMA 0
135 #else
136 #define HWY_NATIVE_FMA 1
137 #endif
138 
139 #define HWY_CAP_GE256 1
140 #define HWY_CAP_GE512 0
141 
142 #define HWY_TARGET_STR HWY_TARGET_STR_AVX2
143 
144 //-----------------------------------------------------------------------------
145 // AVX3[_DL]
146 #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
147 
148 #define HWY_ALIGN alignas(64)
149 #define HWY_MAX_BYTES 64
150 #define HWY_LANES(T) (64 / sizeof(T))
151 
152 #define HWY_HAVE_SCALABLE 0
153 #define HWY_HAVE_INTEGER64 1
154 #define HWY_HAVE_FLOAT16 1
155 #define HWY_HAVE_FLOAT64 1
156 #define HWY_MEM_OPS_MIGHT_FAULT 0
157 #define HWY_NATIVE_FMA 1
158 #define HWY_CAP_GE256 1
159 #define HWY_CAP_GE512 1
160 
161 #if HWY_TARGET == HWY_AVX3
162 
163 #define HWY_NAMESPACE N_AVX3
164 #define HWY_TARGET_STR HWY_TARGET_STR_AVX3
165 
166 #elif HWY_TARGET == HWY_AVX3_DL
167 
168 #define HWY_NAMESPACE N_AVX3_DL
169 #define HWY_TARGET_STR \
170  HWY_TARGET_STR_AVX3 \
171  ",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avxvnni,avx512bitalg," \
172  "avx512vpopcntdq"
173 
174 #else
175 #error "Logic error"
176 #endif // HWY_TARGET == HWY_AVX3_DL
177 
178 //-----------------------------------------------------------------------------
179 // PPC8
180 #elif HWY_TARGET == HWY_PPC8
181 
182 #define HWY_ALIGN alignas(16)
183 #define HWY_MAX_BYTES 16
184 #define HWY_LANES(T) (16 / sizeof(T))
185 
186 #define HWY_HAVE_SCALABLE 0
187 #define HWY_HAVE_INTEGER64 1
188 #define HWY_HAVE_FLOAT16 0
189 #define HWY_HAVE_FLOAT64 1
190 #define HWY_MEM_OPS_MIGHT_FAULT 1
191 #define HWY_NATIVE_FMA 1
192 #define HWY_CAP_GE256 0
193 #define HWY_CAP_GE512 0
194 
195 #define HWY_NAMESPACE N_PPC8
196 
197 #define HWY_TARGET_STR "altivec,vsx"
198 
199 //-----------------------------------------------------------------------------
200 // NEON
201 #elif HWY_TARGET == HWY_NEON
202 
203 #define HWY_ALIGN alignas(16)
204 #define HWY_MAX_BYTES 16
205 #define HWY_LANES(T) (16 / sizeof(T))
206 
207 #define HWY_HAVE_SCALABLE 0
208 #define HWY_HAVE_INTEGER64 1
209 #define HWY_HAVE_FLOAT16 1
210 
211 #if HWY_ARCH_ARM_A64
212 #define HWY_HAVE_FLOAT64 1
213 #else
214 #define HWY_HAVE_FLOAT64 0
215 #endif
216 
217 #define HWY_MEM_OPS_MIGHT_FAULT 1
218 
219 #if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
220 #define HWY_NATIVE_FMA 1
221 #else
222 #define HWY_NATIVE_FMA 0
223 #endif
224 
225 #define HWY_CAP_GE256 0
226 #define HWY_CAP_GE512 0
227 
228 #define HWY_NAMESPACE N_NEON
229 
230 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
231 
232 //-----------------------------------------------------------------------------
233 // SVE[2]
234 #elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
235  HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
236 
237 // SVE only requires lane alignment, not natural alignment of the entire vector.
238 #define HWY_ALIGN alignas(8)
239 
240 // Value ensures MaxLanes() is the tightest possible upper bound to reduce
241 // overallocation.
242 #define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
243 
244 #define HWY_HAVE_SCALABLE 1
245 #define HWY_HAVE_INTEGER64 1
246 #define HWY_HAVE_FLOAT16 1
247 #define HWY_HAVE_FLOAT64 1
248 #define HWY_MEM_OPS_MIGHT_FAULT 0
249 #define HWY_NATIVE_FMA 1
250 #define HWY_CAP_GE256 0
251 #define HWY_CAP_GE512 0
252 
253 #if HWY_TARGET == HWY_SVE2
254 #define HWY_NAMESPACE N_SVE2
255 #define HWY_MAX_BYTES 256
256 #elif HWY_TARGET == HWY_SVE_256
257 #define HWY_NAMESPACE N_SVE_256
258 #define HWY_MAX_BYTES 32
259 #elif HWY_TARGET == HWY_SVE2_128
260 #define HWY_NAMESPACE N_SVE2_128
261 #define HWY_MAX_BYTES 16
262 #else
263 #define HWY_NAMESPACE N_SVE
264 #define HWY_MAX_BYTES 256
265 #endif
266 
267 // HWY_TARGET_STR remains undefined
268 
269 //-----------------------------------------------------------------------------
270 // WASM
271 #elif HWY_TARGET == HWY_WASM
272 
273 #define HWY_ALIGN alignas(16)
274 #define HWY_MAX_BYTES 16
275 #define HWY_LANES(T) (16 / sizeof(T))
276 
277 #define HWY_HAVE_SCALABLE 0
278 #define HWY_HAVE_INTEGER64 1
279 #define HWY_HAVE_FLOAT16 1
280 #define HWY_HAVE_FLOAT64 0
281 #define HWY_MEM_OPS_MIGHT_FAULT 1
282 #define HWY_NATIVE_FMA 0
283 #define HWY_CAP_GE256 0
284 #define HWY_CAP_GE512 0
285 
286 #define HWY_NAMESPACE N_WASM
287 
288 #define HWY_TARGET_STR "simd128"
289 
290 //-----------------------------------------------------------------------------
291 // WASM2
292 #elif HWY_TARGET == HWY_WASM_EMU256
293 
294 #define HWY_ALIGN alignas(32)
295 #define HWY_MAX_BYTES 32
296 #define HWY_LANES(T) (32 / sizeof(T))
297 
298 #define HWY_HAVE_SCALABLE 0
299 #define HWY_HAVE_INTEGER64 1
300 #define HWY_HAVE_FLOAT16 1
301 #define HWY_HAVE_FLOAT64 0
302 #define HWY_MEM_OPS_MIGHT_FAULT 1
303 #define HWY_NATIVE_FMA 0
304 #define HWY_CAP_GE256 0
305 #define HWY_CAP_GE512 0
306 
307 #define HWY_NAMESPACE N_WASM2
308 
309 #define HWY_TARGET_STR "simd128"
310 
311 //-----------------------------------------------------------------------------
312 // RVV
313 #elif HWY_TARGET == HWY_RVV
314 
315 // RVV only requires lane alignment, not natural alignment of the entire vector,
316 // and the compiler already aligns builtin types, so nothing to do here.
317 #define HWY_ALIGN
318 
319 // The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
320 #define HWY_MAX_BYTES 65536
321 
322 // = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
323 // LMUL. This is the tightest possible upper bound.
324 #define HWY_LANES(T) (8192 / sizeof(T))
325 
326 #define HWY_HAVE_SCALABLE 1
327 #define HWY_HAVE_INTEGER64 1
328 #define HWY_HAVE_FLOAT64 1
329 #define HWY_MEM_OPS_MIGHT_FAULT 0
330 #define HWY_NATIVE_FMA 1
331 #define HWY_CAP_GE256 0
332 #define HWY_CAP_GE512 0
333 
334 #if defined(__riscv_zfh)
335 #define HWY_HAVE_FLOAT16 1
336 #else
337 #define HWY_HAVE_FLOAT16 0
338 #endif
339 
340 #define HWY_NAMESPACE N_RVV
341 
342 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
343 // (rv64gcv is not a valid target)
344 
345 //-----------------------------------------------------------------------------
346 // EMU128
347 #elif HWY_TARGET == HWY_EMU128
348 
349 #define HWY_ALIGN alignas(16)
350 #define HWY_MAX_BYTES 16
351 #define HWY_LANES(T) (16 / sizeof(T))
352 
353 #define HWY_HAVE_SCALABLE 0
354 #define HWY_HAVE_INTEGER64 1
355 #define HWY_HAVE_FLOAT16 1
356 #define HWY_HAVE_FLOAT64 1
357 #define HWY_MEM_OPS_MIGHT_FAULT 1
358 #define HWY_NATIVE_FMA 0
359 #define HWY_CAP_GE256 0
360 #define HWY_CAP_GE512 0
361 
362 #define HWY_NAMESPACE N_EMU128
363 
364 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
365 
366 //-----------------------------------------------------------------------------
367 // SCALAR
368 #elif HWY_TARGET == HWY_SCALAR
369 
370 #define HWY_ALIGN
371 #define HWY_MAX_BYTES 8
372 #define HWY_LANES(T) 1
373 
374 #define HWY_HAVE_SCALABLE 0
375 #define HWY_HAVE_INTEGER64 1
376 #define HWY_HAVE_FLOAT16 1
377 #define HWY_HAVE_FLOAT64 1
378 #define HWY_MEM_OPS_MIGHT_FAULT 0
379 #define HWY_NATIVE_FMA 0
380 #define HWY_CAP_GE256 0
381 #define HWY_CAP_GE512 0
382 
383 #define HWY_NAMESPACE N_SCALAR
384 
385 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
386 
387 #else
388 #pragma message("HWY_TARGET does not match any known target")
389 #endif // HWY_TARGET
390 
391 // Override this to 1 in asan/msan builds, which will still fault.
392 #if HWY_IS_ASAN || HWY_IS_MSAN
393 #undef HWY_MEM_OPS_MIGHT_FAULT
394 #define HWY_MEM_OPS_MIGHT_FAULT 1
395 #endif
396 
397 // Clang <9 requires this be invoked at file scope, before any namespace.
398 #undef HWY_BEFORE_NAMESPACE
399 #if defined(HWY_TARGET_STR)
400 #define HWY_BEFORE_NAMESPACE() \
401  HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
402  static_assert(true, "For requiring trailing semicolon")
403 #else
404 // avoids compiler warning if no HWY_TARGET_STR
405 #define HWY_BEFORE_NAMESPACE() \
406  static_assert(true, "For requiring trailing semicolon")
407 #endif
408 
409 // Clang <9 requires any namespaces be closed before this macro.
410 #undef HWY_AFTER_NAMESPACE
411 #if defined(HWY_TARGET_STR)
412 #define HWY_AFTER_NAMESPACE() \
413  HWY_POP_ATTRIBUTES \
414  static_assert(true, "For requiring trailing semicolon")
415 #else
416 // avoids compiler warning if no HWY_TARGET_STR
417 #define HWY_AFTER_NAMESPACE() \
418  static_assert(true, "For requiring trailing semicolon")
419 #endif
420 
421 #undef HWY_ATTR
422 #if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
423 #define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
424 #else
425 #define HWY_ATTR
426 #endif