Grok
10.0.3
src
lib
core
highway
hwy
ops
set_macros-inl.h
Go to the documentation of this file.
1
// Copyright 2020 Google LLC
2
// SPDX-License-Identifier: Apache-2.0
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
// Sets macros based on HWY_TARGET.
17
18
// This include guard is toggled by foreach_target, so avoid the usual _H_
19
// suffix to prevent copybara from renaming it.
20
#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
21
#ifdef HWY_SET_MACROS_PER_TARGET
22
#undef HWY_SET_MACROS_PER_TARGET
23
#else
24
#define HWY_SET_MACROS_PER_TARGET
25
#endif
26
27
#endif
// HWY_SET_MACROS_PER_TARGET
28
29
#include "
hwy/detect_targets.h
"
30
31
#undef HWY_NAMESPACE
32
#undef HWY_ALIGN
33
#undef HWY_MAX_BYTES
34
#undef HWY_LANES
35
36
#undef HWY_HAVE_SCALABLE
37
#undef HWY_HAVE_INTEGER64
38
#undef HWY_HAVE_FLOAT16
39
#undef HWY_HAVE_FLOAT64
40
#undef HWY_MEM_OPS_MIGHT_FAULT
41
#undef HWY_NATIVE_FMA
42
#undef HWY_CAP_GE256
43
#undef HWY_CAP_GE512
44
45
#undef HWY_TARGET_STR
46
47
#if defined(HWY_DISABLE_PCLMUL_AES)
48
#define HWY_TARGET_STR_PCLMUL_AES ""
49
#else
50
#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
51
#endif
52
53
#if defined(HWY_DISABLE_BMI2_FMA)
54
#define HWY_TARGET_STR_BMI2_FMA ""
55
#else
56
#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
57
#endif
58
59
#if defined(HWY_DISABLE_F16C)
60
#define HWY_TARGET_STR_F16C ""
61
#else
62
#define HWY_TARGET_STR_F16C ",f16c"
63
#endif
64
65
#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
66
67
#define HWY_TARGET_STR_SSE4 \
68
HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2"
HWY_TARGET_STR_PCLMUL_AES
69
// Include previous targets, which are the half-vectors of the next target.
70
#define HWY_TARGET_STR_AVX2 \
71
HWY_TARGET_STR_SSE4 ",avx,avx2"
HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
72
#define HWY_TARGET_STR_AVX3 \
73
HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
74
75
// Before include guard so we redefine HWY_TARGET_STR on each include,
76
// governed by the current HWY_TARGET.
77
78
//-----------------------------------------------------------------------------
79
// SSSE3
80
#if HWY_TARGET == HWY_SSSE3
81
82
#define HWY_NAMESPACE N_SSSE3
83
#define HWY_ALIGN alignas(16)
84
#define HWY_MAX_BYTES 16
85
#define HWY_LANES(T) (16 / sizeof(T))
86
87
#define HWY_HAVE_SCALABLE 0
88
#define HWY_HAVE_INTEGER64 1
89
#define HWY_HAVE_FLOAT16 1
90
#define HWY_HAVE_FLOAT64 1
91
#define HWY_MEM_OPS_MIGHT_FAULT 1
92
#define HWY_NATIVE_FMA 0
93
#define HWY_CAP_GE256 0
94
#define HWY_CAP_GE512 0
95
96
#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
97
98
//-----------------------------------------------------------------------------
99
// SSE4
100
#elif HWY_TARGET == HWY_SSE4
101
102
#define HWY_NAMESPACE N_SSE4
103
#define HWY_ALIGN alignas(16)
104
#define HWY_MAX_BYTES 16
105
#define HWY_LANES(T) (16 / sizeof(T))
106
107
#define HWY_HAVE_SCALABLE 0
108
#define HWY_HAVE_INTEGER64 1
109
#define HWY_HAVE_FLOAT16 1
110
#define HWY_HAVE_FLOAT64 1
111
#define HWY_MEM_OPS_MIGHT_FAULT 1
112
#define HWY_NATIVE_FMA 0
113
#define HWY_CAP_GE256 0
114
#define HWY_CAP_GE512 0
115
116
#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
117
118
//-----------------------------------------------------------------------------
119
// AVX2
120
#elif HWY_TARGET == HWY_AVX2
121
122
#define HWY_NAMESPACE N_AVX2
123
#define HWY_ALIGN alignas(32)
124
#define HWY_MAX_BYTES 32
125
#define HWY_LANES(T) (32 / sizeof(T))
126
127
#define HWY_HAVE_SCALABLE 0
128
#define HWY_HAVE_INTEGER64 1
129
#define HWY_HAVE_FLOAT16 1
130
#define HWY_HAVE_FLOAT64 1
131
#define HWY_MEM_OPS_MIGHT_FAULT 1
132
133
#ifdef HWY_DISABLE_BMI2_FMA
134
#define HWY_NATIVE_FMA 0
135
#else
136
#define HWY_NATIVE_FMA 1
137
#endif
138
139
#define HWY_CAP_GE256 1
140
#define HWY_CAP_GE512 0
141
142
#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
143
144
//-----------------------------------------------------------------------------
145
// AVX3[_DL]
146
#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
147
148
#define HWY_ALIGN alignas(64)
149
#define HWY_MAX_BYTES 64
150
#define HWY_LANES(T) (64 / sizeof(T))
151
152
#define HWY_HAVE_SCALABLE 0
153
#define HWY_HAVE_INTEGER64 1
154
#define HWY_HAVE_FLOAT16 1
155
#define HWY_HAVE_FLOAT64 1
156
#define HWY_MEM_OPS_MIGHT_FAULT 0
157
#define HWY_NATIVE_FMA 1
158
#define HWY_CAP_GE256 1
159
#define HWY_CAP_GE512 1
160
161
#if HWY_TARGET == HWY_AVX3
162
163
#define HWY_NAMESPACE N_AVX3
164
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
165
166
#elif HWY_TARGET == HWY_AVX3_DL
167
168
#define HWY_NAMESPACE N_AVX3_DL
169
#define HWY_TARGET_STR \
170
HWY_TARGET_STR_AVX3 \
171
",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avxvnni,avx512bitalg,"
\
172
"avx512vpopcntdq"
173
174
#else
175
#error "Logic error"
176
#endif
// HWY_TARGET == HWY_AVX3_DL
177
178
//-----------------------------------------------------------------------------
179
// PPC8
180
#elif HWY_TARGET == HWY_PPC8
181
182
#define HWY_ALIGN alignas(16)
183
#define HWY_MAX_BYTES 16
184
#define HWY_LANES(T) (16 / sizeof(T))
185
186
#define HWY_HAVE_SCALABLE 0
187
#define HWY_HAVE_INTEGER64 1
188
#define HWY_HAVE_FLOAT16 0
189
#define HWY_HAVE_FLOAT64 1
190
#define HWY_MEM_OPS_MIGHT_FAULT 1
191
#define HWY_NATIVE_FMA 1
192
#define HWY_CAP_GE256 0
193
#define HWY_CAP_GE512 0
194
195
#define HWY_NAMESPACE N_PPC8
196
197
#define HWY_TARGET_STR "altivec,vsx"
198
199
//-----------------------------------------------------------------------------
200
// NEON
201
#elif HWY_TARGET == HWY_NEON
202
203
#define HWY_ALIGN alignas(16)
204
#define HWY_MAX_BYTES 16
205
#define HWY_LANES(T) (16 / sizeof(T))
206
207
#define HWY_HAVE_SCALABLE 0
208
#define HWY_HAVE_INTEGER64 1
209
#define HWY_HAVE_FLOAT16 1
210
211
#if HWY_ARCH_ARM_A64
212
#define HWY_HAVE_FLOAT64 1
213
#else
214
#define HWY_HAVE_FLOAT64 0
215
#endif
216
217
#define HWY_MEM_OPS_MIGHT_FAULT 1
218
219
#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
220
#define HWY_NATIVE_FMA 1
221
#else
222
#define HWY_NATIVE_FMA 0
223
#endif
224
225
#define HWY_CAP_GE256 0
226
#define HWY_CAP_GE512 0
227
228
#define HWY_NAMESPACE N_NEON
229
230
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
231
232
//-----------------------------------------------------------------------------
233
// SVE[2]
234
#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
235
HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
236
237
// SVE only requires lane alignment, not natural alignment of the entire vector.
238
#define HWY_ALIGN alignas(8)
239
240
// Value ensures MaxLanes() is the tightest possible upper bound to reduce
241
// overallocation.
242
#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
243
244
#define HWY_HAVE_SCALABLE 1
245
#define HWY_HAVE_INTEGER64 1
246
#define HWY_HAVE_FLOAT16 1
247
#define HWY_HAVE_FLOAT64 1
248
#define HWY_MEM_OPS_MIGHT_FAULT 0
249
#define HWY_NATIVE_FMA 1
250
#define HWY_CAP_GE256 0
251
#define HWY_CAP_GE512 0
252
253
#if HWY_TARGET == HWY_SVE2
254
#define HWY_NAMESPACE N_SVE2
255
#define HWY_MAX_BYTES 256
256
#elif HWY_TARGET == HWY_SVE_256
257
#define HWY_NAMESPACE N_SVE_256
258
#define HWY_MAX_BYTES 32
259
#elif HWY_TARGET == HWY_SVE2_128
260
#define HWY_NAMESPACE N_SVE2_128
261
#define HWY_MAX_BYTES 16
262
#else
263
#define HWY_NAMESPACE N_SVE
264
#define HWY_MAX_BYTES 256
265
#endif
266
267
// HWY_TARGET_STR remains undefined
268
269
//-----------------------------------------------------------------------------
270
// WASM
271
#elif HWY_TARGET == HWY_WASM
272
273
#define HWY_ALIGN alignas(16)
274
#define HWY_MAX_BYTES 16
275
#define HWY_LANES(T) (16 / sizeof(T))
276
277
#define HWY_HAVE_SCALABLE 0
278
#define HWY_HAVE_INTEGER64 1
279
#define HWY_HAVE_FLOAT16 1
280
#define HWY_HAVE_FLOAT64 0
281
#define HWY_MEM_OPS_MIGHT_FAULT 1
282
#define HWY_NATIVE_FMA 0
283
#define HWY_CAP_GE256 0
284
#define HWY_CAP_GE512 0
285
286
#define HWY_NAMESPACE N_WASM
287
288
#define HWY_TARGET_STR "simd128"
289
290
//-----------------------------------------------------------------------------
291
// WASM2
292
#elif HWY_TARGET == HWY_WASM_EMU256
293
294
#define HWY_ALIGN alignas(32)
295
#define HWY_MAX_BYTES 32
296
#define HWY_LANES(T) (32 / sizeof(T))
297
298
#define HWY_HAVE_SCALABLE 0
299
#define HWY_HAVE_INTEGER64 1
300
#define HWY_HAVE_FLOAT16 1
301
#define HWY_HAVE_FLOAT64 0
302
#define HWY_MEM_OPS_MIGHT_FAULT 1
303
#define HWY_NATIVE_FMA 0
304
#define HWY_CAP_GE256 0
305
#define HWY_CAP_GE512 0
306
307
#define HWY_NAMESPACE N_WASM2
308
309
#define HWY_TARGET_STR "simd128"
310
311
//-----------------------------------------------------------------------------
312
// RVV
313
#elif HWY_TARGET == HWY_RVV
314
315
// RVV only requires lane alignment, not natural alignment of the entire vector,
316
// and the compiler already aligns builtin types, so nothing to do here.
317
#define HWY_ALIGN
318
319
// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
320
#define HWY_MAX_BYTES 65536
321
322
// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
323
// LMUL. This is the tightest possible upper bound.
324
#define HWY_LANES(T) (8192 / sizeof(T))
325
326
#define HWY_HAVE_SCALABLE 1
327
#define HWY_HAVE_INTEGER64 1
328
#define HWY_HAVE_FLOAT64 1
329
#define HWY_MEM_OPS_MIGHT_FAULT 0
330
#define HWY_NATIVE_FMA 1
331
#define HWY_CAP_GE256 0
332
#define HWY_CAP_GE512 0
333
334
#if defined(__riscv_zfh)
335
#define HWY_HAVE_FLOAT16 1
336
#else
337
#define HWY_HAVE_FLOAT16 0
338
#endif
339
340
#define HWY_NAMESPACE N_RVV
341
342
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
343
// (rv64gcv is not a valid target)
344
345
//-----------------------------------------------------------------------------
346
// EMU128
347
#elif HWY_TARGET == HWY_EMU128
348
349
#define HWY_ALIGN alignas(16)
350
#define HWY_MAX_BYTES 16
351
#define HWY_LANES(T) (16 / sizeof(T))
352
353
#define HWY_HAVE_SCALABLE 0
354
#define HWY_HAVE_INTEGER64 1
355
#define HWY_HAVE_FLOAT16 1
356
#define HWY_HAVE_FLOAT64 1
357
#define HWY_MEM_OPS_MIGHT_FAULT 1
358
#define HWY_NATIVE_FMA 0
359
#define HWY_CAP_GE256 0
360
#define HWY_CAP_GE512 0
361
362
#define HWY_NAMESPACE N_EMU128
363
364
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
365
366
//-----------------------------------------------------------------------------
367
// SCALAR
368
#elif HWY_TARGET == HWY_SCALAR
369
370
#define HWY_ALIGN
371
#define HWY_MAX_BYTES 8
372
#define HWY_LANES(T) 1
373
374
#define HWY_HAVE_SCALABLE 0
375
#define HWY_HAVE_INTEGER64 1
376
#define HWY_HAVE_FLOAT16 1
377
#define HWY_HAVE_FLOAT64 1
378
#define HWY_MEM_OPS_MIGHT_FAULT 0
379
#define HWY_NATIVE_FMA 0
380
#define HWY_CAP_GE256 0
381
#define HWY_CAP_GE512 0
382
383
#define HWY_NAMESPACE N_SCALAR
384
385
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
386
387
#else
388
#pragma message("HWY_TARGET does not match any known target"
)
389
#endif
// HWY_TARGET
390
391
// Override this to 1 in asan/msan builds, which will still fault.
392
#if HWY_IS_ASAN || HWY_IS_MSAN
393
#undef HWY_MEM_OPS_MIGHT_FAULT
394
#define HWY_MEM_OPS_MIGHT_FAULT 1
395
#endif
396
397
// Clang <9 requires this be invoked at file scope, before any namespace.
398
#undef HWY_BEFORE_NAMESPACE
399
#if defined(HWY_TARGET_STR)
400
#define HWY_BEFORE_NAMESPACE() \
401
HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
402
static_assert(true, "For requiring trailing semicolon"
)
403
#else
404
// avoids compiler warning if no HWY_TARGET_STR
405
#define HWY_BEFORE_NAMESPACE() \
406
static_assert(true, "For requiring trailing semicolon"
)
407
#endif
408
409
// Clang <9 requires any namespaces be closed before this macro.
410
#undef HWY_AFTER_NAMESPACE
411
#if defined(HWY_TARGET_STR)
412
#define HWY_AFTER_NAMESPACE() \
413
HWY_POP_ATTRIBUTES \
414
static_assert(true, "For requiring trailing semicolon"
)
415
#else
416
// avoids compiler warning if no HWY_TARGET_STR
417
#define HWY_AFTER_NAMESPACE() \
418
static_assert(true, "For requiring trailing semicolon"
)
419
#endif
420
421
#undef HWY_ATTR
422
#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
423
#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
424
#else
425
#define HWY_ATTR
426
#endif
detect_targets.h
Generated by
1.9.1