19 #include <riscv_vector.h>
33 using DFromV =
typename DFromV_t<RemoveConst<V>>::type;
36 using TFromV = TFromD<DFromV<V>>;
39 #define HWY_RVV_IF_POW2_IN(D, min, max) \
40 hwy::EnableIf<(min) <= Pow2(D()) && Pow2(D()) <= (max)>* = nullptr
42 template <
typename T,
size_t N,
int kPow2>
59 #define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP) \
60 X_MACRO(64, 0, 64, NAME, OP) \
61 X_MACRO(32, 0, 32, NAME, OP) \
62 X_MACRO(16, 0, 16, NAME, OP) \
63 X_MACRO(8, 0, 8, NAME, OP) \
64 X_MACRO(8, 1, 4, NAME, OP) \
65 X_MACRO(8, 2, 2, NAME, OP) \
66 X_MACRO(8, 3, 1, NAME, OP)
78 #define HWY_RVV_FOREACH_08_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
79 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
80 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
81 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
82 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP) \
83 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP) \
84 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
86 #define HWY_RVV_FOREACH_16_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
87 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
88 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
89 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP) \
90 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP) \
91 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
93 #define HWY_RVV_FOREACH_32_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
94 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
95 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP) \
96 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP) \
97 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
99 #define HWY_RVV_FOREACH_64_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
100 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP) \
101 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP) \
102 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
105 #define HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
106 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
107 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
108 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
109 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP) \
110 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP) \
111 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
113 #define HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
114 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, 64, NAME, OP) \
115 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
116 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
117 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP) \
118 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP) \
119 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
121 #define HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
122 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, 64, NAME, OP) \
123 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
124 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP) \
125 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP) \
126 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
128 #define HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
129 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, 64, NAME, OP) \
130 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP) \
131 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP) \
132 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
135 #define HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
136 X_MACRO(BASE, CHAR, 8, 16, __, mf8, mf4, __, -3, 64, NAME, OP) \
137 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
138 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
139 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
140 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP)
142 #define HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
143 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, 64, NAME, OP) \
144 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
145 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
146 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP)
148 #define HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
149 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, 64, NAME, OP) \
150 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
151 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP)
153 #define HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
154 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, 64, NAME, OP) \
155 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP)
158 #define HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
159 HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
160 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP)
162 #define HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
163 HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
164 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP)
166 #define HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
167 HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
168 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP)
170 #define HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
171 HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
172 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP)
175 #define HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
176 HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
177 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
179 #define HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
180 HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
181 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
183 #define HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
184 HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
185 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
187 #define HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
188 HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
189 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
207 #define HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
209 #define HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
210 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -3, 64, NAME, OP)
212 #define HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
213 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -2, 64, NAME, OP)
215 #define HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
216 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, -1, 64, NAME, OP)
219 #define HWY_RVV_FOREACH_08_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
220 HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
221 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
223 #define HWY_RVV_FOREACH_16_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
224 HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
225 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
227 #define HWY_RVV_FOREACH_32_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
228 HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
229 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
231 #define HWY_RVV_FOREACH_64_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
232 HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
233 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
236 #define HWY_RVV_FOREACH_08_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
237 HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
238 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
240 #define HWY_RVV_FOREACH_16_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
241 HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
242 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
244 #define HWY_RVV_FOREACH_32_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
245 HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
246 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
248 #define HWY_RVV_FOREACH_64_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
249 HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
250 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
253 #define HWY_RVV_FOREACH_08_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
254 HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
255 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
257 #define HWY_RVV_FOREACH_16_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
258 HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
259 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
261 #define HWY_RVV_FOREACH_32_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
262 HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
263 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
265 #define HWY_RVV_FOREACH_64_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
266 HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
267 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
270 #define HWY_RVV_FOREACH_08_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
271 HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
272 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
274 #define HWY_RVV_FOREACH_16_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
275 HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
276 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
278 #define HWY_RVV_FOREACH_32_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
279 HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
280 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
282 #define HWY_RVV_FOREACH_64_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
283 HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
284 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
287 #define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
288 HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, uint, u, NAME, OP)
289 #define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
290 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, uint, u, NAME, OP)
291 #define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
292 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, uint, u, NAME, OP)
293 #define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS) \
294 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, uint, u, NAME, OP)
297 #define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS) \
298 HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, int, i, NAME, OP)
299 #define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
300 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, int, i, NAME, OP)
301 #define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
302 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, int, i, NAME, OP)
303 #define HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS) \
304 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, int, i, NAME, OP)
308 #define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS) \
309 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, float, f, NAME, OP)
311 #define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)
313 #define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS) \
314 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, float, f, NAME, OP)
315 #define HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS) \
316 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, float, f, NAME, OP)
319 #define HWY_RVV_FOREACH_UI08(X_MACRO, NAME, OP, LMULS) \
320 HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
321 HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)
323 #define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP, LMULS) \
324 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
325 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)
327 #define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS) \
328 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
329 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)
331 #define HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS) \
332 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS) \
333 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
335 #define HWY_RVV_FOREACH_UI3264(X_MACRO, NAME, OP, LMULS) \
336 HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS) \
337 HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS)
339 #define HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS) \
340 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
341 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
342 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
344 #define HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS) \
345 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
346 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
347 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
349 #define HWY_RVV_FOREACH_UI163264(X_MACRO, NAME, OP, LMULS) \
350 HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS) \
351 HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS)
353 #define HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS) \
354 HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS) \
355 HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS)
358 #define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
359 HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
360 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
361 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
362 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
364 #define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS) \
365 HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS) \
366 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
367 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
368 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
370 #define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS) \
371 HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS) \
372 HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS)
375 #define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP, LMULS) \
376 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
377 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)
379 #define HWY_RVV_FOREACH(X_MACRO, NAME, OP, LMULS) \
380 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
381 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS) \
382 HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS)
385 #define HWY_RVV_T(BASE, SEW) BASE##SEW##_t
386 #define HWY_RVV_D(BASE, SEW, N, SHIFT) Simd<HWY_RVV_T(BASE, SEW), N, SHIFT>
387 #define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##LMUL##_t
388 #define HWY_RVV_M(MLEN) vbool##MLEN##_t
394 #define HWY_SPECIALIZE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
397 struct DFromV_t<HWY_RVV_V(BASE, SEW, LMUL)> { \
398 using Lane = HWY_RVV_T(BASE, SEW); \
399 using type = ScalableTag<Lane, SHIFT>; \
403 #undef HWY_SPECIALIZE
409 #define HWY_RVV_LANES(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
411 template <size_t N> \
412 HWY_API size_t NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) { \
413 size_t actual = v##OP##SEW##LMUL(); \
416 if (detail::IsFull(d)) return actual; \
420 if (detail::ScaleByPower(128 / SEW, SHIFT) == 1) actual >>= 1; \
421 return HWY_MIN(actual, N); \
427 template <
size_t N,
int kPow2>
436 #define HWY_RVV_AVL(SEW, SHIFT) \
437 Lanes(ScalableTag<HWY_RVV_T(uint, SEW), SHIFT>())
440 #define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
441 SHIFT, MLEN, NAME, OP) \
442 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
443 return v##OP##_v_##CHAR##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
447 #define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
448 SHIFT, MLEN, NAME, OP) \
449 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
450 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
451 return v##OP##_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
455 #define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
456 SHIFT, MLEN, NAME, OP) \
457 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
458 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
459 return v##OP##_vv_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
463 #define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP) \
464 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) m) { \
465 return vm##OP##_m_b##MLEN(m, ~0ull); \
472 #define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
474 template <size_t N> \
475 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
476 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_T(BASE, SEW) arg) { \
477 return v##OP##_##CHAR##SEW##LMUL(arg, Lanes(d)); \
486 template <
size_t N,
int kPow2>
493 using VFromD = decltype(
Set(D(), TFromD<D>()));
497 template <
typename T,
size_t N,
int kPow2>
508 #define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
509 SHIFT, MLEN, NAME, OP) \
510 template <size_t N> \
511 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
512 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) ) { \
513 return v##OP##_##CHAR##SEW##LMUL(); \
517 #undef HWY_RVV_UNDEFINED
530 #define HWY_RVV_TRUNC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
532 HWY_API HWY_RVV_V(BASE, SEW, LMULH) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
533 return v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULH(v); \
539 #define HWY_RVV_EXT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
541 template <size_t N> \
542 HWY_API HWY_RVV_V(BASE, SEW, LMULD) \
543 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) , \
544 HWY_RVV_V(BASE, SEW, LMUL) v) { \
545 return v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULD(v); \
552 #define HWY_RVV_EXT_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
553 SHIFT, MLEN, NAME, OP) \
554 template <size_t N> \
555 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
556 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) , \
557 HWY_RVV_V(BASE, SEW, LMUL) v) { \
561 #undef HWY_RVV_EXT_VIRT
567 #define HWY_RVV_CAST_U8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
568 SHIFT, MLEN, NAME, OP) \
569 template <typename T, size_t N> \
570 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
571 vuint8##LMUL##_t v) { \
574 template <size_t N> \
575 HWY_API vuint8##LMUL##_t BitCastFromByte( \
576 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
581 #define HWY_RVV_CAST_I8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
582 SHIFT, MLEN, NAME, OP) \
583 template <typename T, size_t N> \
584 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
585 vint8##LMUL##_t v) { \
586 return vreinterpret_v_i8##LMUL##_u8##LMUL(v); \
588 template <size_t N> \
589 HWY_API vint8##LMUL##_t BitCastFromByte( \
590 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
591 return vreinterpret_v_u8##LMUL##_i8##LMUL(v); \
596 #define HWY_RVV_CAST_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
598 template <typename T, size_t N> \
599 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
600 HWY_RVV_V(BASE, SEW, LMUL) v) { \
601 return v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v); \
603 template <size_t N> \
604 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
605 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
606 return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v); \
610 #define HWY_RVV_CAST_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
611 SHIFT, MLEN, NAME, OP) \
612 template <typename T, size_t N> \
613 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
614 HWY_RVV_V(BASE, SEW, LMUL) v) { \
615 return v##OP##_v_u##SEW##LMUL##_u8##LMUL( \
616 v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v)); \
618 template <size_t N> \
619 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
620 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
621 return v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL( \
622 v##OP##_v_u8##LMUL##_u##SEW##LMUL(v)); \
626 #define HWY_RVV_CAST_VIRT_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
627 SHIFT, MLEN, NAME, OP) \
628 template <typename T, size_t N> \
629 HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> , \
630 HWY_RVV_V(BASE, SEW, LMUL) v) { \
631 return detail::Trunc(v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v)); \
633 template <size_t N> \
634 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
635 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMULH##_t v) { \
636 HWY_RVV_D(uint, 8, N, SHIFT + 1) d2; \
637 const vuint8##LMUL##_t v2 = detail::Ext(d2, v); \
638 return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v2); \
642 #define HWY_RVV_CAST_VIRT_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
643 SHIFT, MLEN, NAME, OP) \
644 template <typename T, size_t N> \
645 HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> , \
646 HWY_RVV_V(BASE, SEW, LMUL) v) { \
647 return detail::Trunc(v##OP##_v_u##SEW##LMUL##_u8##LMUL( \
648 v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v))); \
650 template <size_t N> \
651 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
652 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMULH##_t v) { \
653 HWY_RVV_D(uint, 8, N, SHIFT + 1) d2; \
654 const vuint8##LMUL##_t v2 = detail::Ext(d2, v); \
655 return v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL( \
656 v##OP##_v_u8##LMUL##_u##SEW##LMUL(v2)); \
668 #undef HWY_RVV_CAST_U8
669 #undef HWY_RVV_CAST_I8
670 #undef HWY_RVV_CAST_U
671 #undef HWY_RVV_CAST_IF
672 #undef HWY_RVV_CAST_VIRT_U
673 #undef HWY_RVV_CAST_VIRT_IF
675 template <
size_t N,
int kPow2>
683 template <
class D,
class FromV>
690 template <
class V,
class DU = RebindToUn
signed<DFromV<V>>>
701 #define HWY_RVV_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
703 template <size_t N> \
704 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) { \
705 return v##OP##_##CHAR##SEW##LMUL(Lanes(d)); \
711 template <
class D,
class DU = RebindToUn
signed<D>>
724 template <
class V, HWY_IF_FLOAT_V(V)>
740 template <
class V, HWY_IF_FLOAT_V(V)>
742 using DF = DFromV<V>;
743 using DU = RebindToUnsigned<DF>;
751 template <
class V, HWY_IF_FLOAT_V(V)>
753 using DF = DFromV<V>;
754 using DU = RebindToUnsigned<DF>;
767 template <
class V, HWY_IF_FLOAT_V(V)>
769 using DF = DFromV<V>;
770 using DU = RebindToUnsigned<DF>;
778 return And(
Not(not_a), b);
785 return Or(o1,
Or(o2, o3));
792 return Or(o,
And(a1, a2));
848 #define HWY_RVV_SHIFT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
850 template <int kBits> \
851 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
852 return v##OP##_vx_##CHAR##SEW##LMUL(v, kBits, HWY_RVV_AVL(SEW, SHIFT)); \
854 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
855 NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) { \
856 return v##OP##_vx_##CHAR##SEW##LMUL(v, static_cast<uint8_t>(bits), \
857 HWY_RVV_AVL(SEW, SHIFT)); \
876 using VU16 =
VFromD<decltype(du16)>;
878 const VU16 vFDB97531 = ShiftRight<8>(
BitCast(du16,
v));
879 const VU16 vECA86420 = detail::AndS(
BitCast(du16,
v), 0xFF);
880 const VU16 sFE_DC_BA_98_76_54_32_10 =
Add(vFDB97531, vECA86420);
882 const VU16 szz_FE_zz_BA_zz_76_zz_32 =
883 BitCast(du16, ShiftRight<16>(
BitCast(du32, sFE_DC_BA_98_76_54_32_10)));
884 const VU16 sxx_FC_xx_B8_xx_74_xx_30 =
885 Add(sFE_DC_BA_98_76_54_32_10, szz_FE_zz_BA_zz_76_zz_32);
886 const VU16 szz_zz_xx_FC_zz_zz_xx_74 =
887 BitCast(du16, ShiftRight<32>(
BitCast(du64, sxx_FC_xx_B8_xx_74_xx_30)));
888 const VU16 sxx_xx_xx_F8_xx_xx_xx_70 =
889 Add(sxx_FC_xx_B8_xx_74_xx_30, szz_zz_xx_FC_zz_zz_xx_74);
890 return detail::AndS(
BitCast(du64, sxx_xx_xx_F8_xx_xx_xx_70), 0xFFFFull);
894 template <
int kBits,
class V>
896 constexpr
size_t kSizeInBits =
sizeof(TFromV<V>) * 8;
897 static_assert(0 <= kBits && kBits < kSizeInBits,
"Invalid shift count");
898 if (kBits == 0)
return v;
899 return Or(ShiftRight<kBits>(
v), ShiftLeft<kSizeInBits - kBits>(
v));
903 #define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
904 SHIFT, MLEN, NAME, OP) \
905 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
906 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
907 return v##OP##_vv_##CHAR##SEW##LMUL(v, bits, HWY_RVV_AVL(SEW, SHIFT)); \
912 #define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
913 SHIFT, MLEN, NAME, OP) \
914 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
915 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
916 return v##OP##_vv_##CHAR##SEW##LMUL(v, detail::BitCastToUnsigned(bits), \
917 HWY_RVV_AVL(SEW, SHIFT)); \
927 #undef HWY_RVV_SHIFT_II
928 #undef HWY_RVV_SHIFT_VV
992 #define HWY_RVV_FMA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
994 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
995 NAME(HWY_RVV_V(BASE, SEW, LMUL) mul, HWY_RVV_V(BASE, SEW, LMUL) x, \
996 HWY_RVV_V(BASE, SEW, LMUL) add) { \
997 return v##OP##_vv_##CHAR##SEW##LMUL(add, mul, x, HWY_RVV_AVL(SEW, SHIFT)); \
1020 #define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1021 SHIFT, MLEN, NAME, OP) \
1022 HWY_API HWY_RVV_M(MLEN) \
1023 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
1024 return v##OP##_vv_##CHAR##SEW##LMUL##_b##MLEN(a, b, \
1025 HWY_RVV_AVL(SEW, SHIFT)); \
1029 #define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1030 SHIFT, MLEN, NAME, OP) \
1031 HWY_API HWY_RVV_M(MLEN) \
1032 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
1033 return v##OP##_##CHAR##SEW##LMUL##_b##MLEN(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
1068 #undef HWY_RVV_RETM_ARGVV
1069 #undef HWY_RVV_RETM_ARGVS
1074 HWY_API auto Ge(
const V a,
const V b) -> decltype(
Le(a, b)) {
1079 HWY_API auto Gt(
const V a,
const V b) -> decltype(
Lt(a, b)) {
1086 return detail::NeS(
And(a, bit), 0);
1096 #define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP) \
1097 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) a, HWY_RVV_M(MLEN) b) { \
1098 return vm##OP##_mm_b##MLEN(b, a, HWY_RVV_AVL(SEW, SHIFT)); \
1112 #undef HWY_RVV_RETM_ARGMM
1115 #define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1116 SHIFT, MLEN, NAME, OP) \
1117 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1118 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes, \
1119 HWY_RVV_V(BASE, SEW, LMUL) no) { \
1120 return v##OP##_vvm_##CHAR##SEW##LMUL(m, no, yes, HWY_RVV_AVL(SEW, SHIFT)); \
1125 #undef HWY_RVV_IF_THEN_ELSE
1128 template <
class M,
class V>
1135 #define HWY_RVV_IF_THEN_ZERO_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
1136 LMULH, SHIFT, MLEN, NAME, OP) \
1137 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1138 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) no) { \
1139 return v##OP##_##CHAR##SEW##LMUL(m, no, 0, HWY_RVV_AVL(SEW, SHIFT)); \
1145 #undef HWY_RVV_IF_THEN_ZERO_ELSE
1151 return detail::NeS(
v, 0);
1157 template <
class D,
typename MFrom>
1167 #define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1168 SHIFT, MLEN, NAME, OP) \
1169 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1170 NAME(HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_M(MLEN) m) { \
1171 return v##OP##_##CHAR##SEW##LMUL##_m(m, v0, v0, 1, \
1172 HWY_RVV_AVL(SEW, SHIFT)); \
1176 #undef HWY_RVV_VEC_FROM_MASK
1179 template <
class D, HWY_IF_NOT_FLOAT_D(D)>
1181 return detail::SubS(
Zero(
d), mask);
1184 template <
class D, HWY_IF_FLOAT_D(D)>
1211 static_assert(
IsSigned<TFromV<V>>(),
"Only works for signed/float");
1222 #define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1223 template <class D> \
1224 HWY_API intptr_t FindFirstTrue(D d, HWY_RVV_M(MLEN) m) { \
1225 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1226 return vfirst_m_b##MLEN(m, Lanes(d)); \
1230 #undef HWY_RVV_FIND_FIRST_TRUE
1240 #define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1241 template <class D> \
1242 HWY_API bool AllTrue(D d, HWY_RVV_M(MLEN) m) { \
1243 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1244 return AllFalse(d, vmnot_m_b##MLEN(m, Lanes(d))); \
1248 #undef HWY_RVV_ALL_TRUE
1252 #define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1253 template <class D> \
1254 HWY_API size_t CountTrue(D d, HWY_RVV_M(MLEN) m) { \
1255 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1256 return vcpop_m_b##MLEN(m, Lanes(d)); \
1260 #undef HWY_RVV_COUNT_TRUE
1266 #define HWY_RVV_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1268 template <size_t N> \
1269 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1270 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1271 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1272 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, Lanes(d)); \
1278 template <
size_t N,
int kPow2>
1285 template <
size_t N,
int kPow2>
1302 #define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1303 SHIFT, MLEN, NAME, OP) \
1304 template <size_t N> \
1305 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1306 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1307 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1308 return v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, Zero(d), p, Lanes(d)); \
1311 #undef HWY_RVV_MASKED_LOAD
1315 #define HWY_RVV_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1317 template <size_t N> \
1318 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
1319 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1320 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1321 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, Lanes(d)); \
1324 #undef HWY_RVV_STORE
1328 #define HWY_RVV_BLENDED_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1329 SHIFT, MLEN, NAME, OP) \
1330 template <size_t N> \
1331 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) m, \
1332 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1333 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1334 return v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, p, v, Lanes(d)); \
1337 #undef HWY_RVV_BLENDED_STORE
1341 #define HWY_RVV_STOREN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1343 template <size_t N> \
1344 HWY_API void NAME(size_t count, HWY_RVV_V(BASE, SEW, LMUL) v, \
1345 HWY_RVV_D(BASE, SEW, N, SHIFT) , \
1346 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1347 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, count); \
1350 #undef HWY_RVV_STOREN
1357 template <
class V,
class D>
1363 template <
class V,
class D,
typename T>
1370 #define HWY_RVV_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1371 SHIFT, MLEN, NAME, OP) \
1372 template <size_t N> \
1373 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
1374 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1375 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
1376 HWY_RVV_V(int, SEW, LMUL) offset) { \
1377 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
1378 base, detail::BitCastToUnsigned(offset), v, Lanes(d)); \
1381 #undef HWY_RVV_SCATTER
1385 template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1391 template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1393 const VFromD<RebindToSigned<D>> index) {
1399 #define HWY_RVV_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1401 template <size_t N> \
1402 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1403 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1404 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
1405 HWY_RVV_V(int, SEW, LMUL) offset) { \
1406 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
1407 base, detail::BitCastToUnsigned(offset), Lanes(d)); \
1410 #undef HWY_RVV_GATHER
1414 template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1420 template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1422 const VFromD<RebindToSigned<D>> index) {
1429 #ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
1430 #undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
1432 #define HWY_NATIVE_LOAD_STORE_INTERLEAVED
1435 #define HWY_RVV_LOAD2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1437 template <size_t N> \
1438 HWY_API void NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1439 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned, \
1440 HWY_RVV_V(BASE, SEW, LMUL) & v0, \
1441 HWY_RVV_V(BASE, SEW, LMUL) & v1) { \
1442 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(&v0, &v1, unaligned, Lanes(d)); \
1446 #undef HWY_RVV_LOAD2
1450 #define HWY_RVV_LOAD3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1452 template <size_t N> \
1453 HWY_API void NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1454 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned, \
1455 HWY_RVV_V(BASE, SEW, LMUL) & v0, \
1456 HWY_RVV_V(BASE, SEW, LMUL) & v1, \
1457 HWY_RVV_V(BASE, SEW, LMUL) & v2) { \
1458 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(&v0, &v1, &v2, unaligned, Lanes(d)); \
1462 #undef HWY_RVV_LOAD3
1466 #define HWY_RVV_LOAD4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1468 template <size_t N> \
1469 HWY_API void NAME( \
1470 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1471 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT aligned, \
1472 HWY_RVV_V(BASE, SEW, LMUL) & v0, HWY_RVV_V(BASE, SEW, LMUL) & v1, \
1473 HWY_RVV_V(BASE, SEW, LMUL) & v2, HWY_RVV_V(BASE, SEW, LMUL) & v3) { \
1474 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(&v0, &v1, &v2, &v3, aligned, \
1479 #undef HWY_RVV_LOAD4
1483 #define HWY_RVV_STORE2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1485 template <size_t N> \
1486 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v0, \
1487 HWY_RVV_V(BASE, SEW, LMUL) v1, \
1488 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1489 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) { \
1490 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(unaligned, v0, v1, Lanes(d)); \
1494 #undef HWY_RVV_STORE2
1498 #define HWY_RVV_STORE3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1500 template <size_t N> \
1501 HWY_API void NAME( \
1502 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1503 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1504 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) { \
1505 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(unaligned, v0, v1, v2, Lanes(d)); \
1509 #undef HWY_RVV_STORE3
1513 #define HWY_RVV_STORE4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1515 template <size_t N> \
1516 HWY_API void NAME( \
1517 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1518 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_V(BASE, SEW, LMUL) v3, \
1519 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1520 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT aligned) { \
1521 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(aligned, v0, v1, v2, v3, Lanes(d)); \
1525 #undef HWY_RVV_STORE4
1532 #define HWY_RVV_PROMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1533 SHIFT, MLEN, NAME, OP) \
1534 template <size_t N> \
1535 HWY_API HWY_RVV_V(BASE, SEWD, LMULD) NAME( \
1536 HWY_RVV_D(BASE, SEWD, N, SHIFT + 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1537 return OP##CHAR##SEWD##LMULD(v, Lanes(d)); \
1548 #undef HWY_RVV_PROMOTE
1552 #define HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, LMUL, LMUL_IN, \
1554 template <size_t N> \
1555 HWY_API HWY_RVV_V(BASE, BITS, LMUL) \
1556 PromoteTo(HWY_RVV_D(BASE, BITS, N, SHIFT + ADD) d, \
1557 HWY_RVV_V(BASE_IN, BITS_IN, LMUL_IN) v) { \
1558 return OP##CHAR##BITS##LMUL(v, Lanes(d)); \
1561 #define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1562 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -2, 1) \
1563 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -1, 1) \
1564 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, m1, 0, 1) \
1565 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m2, 1, 1) \
1566 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m4, 2, 1)
1568 #define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1569 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, mf2, mf8, -3, 2) \
1570 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf4, -2, 2) \
1571 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, mf2, -1, 2) \
1572 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m1, 0, 2) \
1573 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m2, 1, 2)
1581 #undef HWY_RVV_PROMOTE_X4
1582 #undef HWY_RVV_PROMOTE_X2
1583 #undef HWY_RVV_PROMOTE
1586 template <
size_t N,
int kPow2>
1593 template <
size_t N,
int kPow2>
1600 template <
size_t N,
int kPow2>
1607 template <
size_t N,
int kPow2>
1612 const Rebind<uint16_t, decltype(
d)> du16;
1619 #define HWY_RVV_DEMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1621 template <size_t N> \
1622 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME( \
1623 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1624 return OP##CHAR##SEWH##LMULH(v, 0, Lanes(d)); \
1626 template <size_t N> \
1627 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME##Shr16( \
1628 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1629 return OP##CHAR##SEWH##LMULH(v, 16, Lanes(d)); \
1639 #define HWY_RVV_DEMOTE_I_TO_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1640 SHIFT, MLEN, NAME, OP) \
1641 template <size_t N> \
1642 HWY_API HWY_RVV_V(uint, SEWH, LMULH) NAME( \
1643 HWY_RVV_D(uint, SEWH, N, SHIFT - 1) d, HWY_RVV_V(int, SEW, LMUL) v) { \
1645 return detail::DemoteTo(d, detail::BitCastToUnsigned(detail::MaxS(v, 0))); \
1649 #undef HWY_RVV_DEMOTE_I_TO_U
1674 return vnclipu_wx_u8mf8(vnclipu_wx_u16mf4(
v, 0, avl), 0, avl);
1678 return vnclipu_wx_u8mf4(vnclipu_wx_u16mf2(
v, 0, avl), 0, avl);
1682 return vnclipu_wx_u8mf2(vnclipu_wx_u16m1(
v, 0, avl), 0, avl);
1686 return vnclipu_wx_u8m1(vnclipu_wx_u16m2(
v, 0, avl), 0, avl);
1690 return vnclipu_wx_u8m2(vnclipu_wx_u16m4(
v, 0, avl), 0, avl);
1719 #undef HWY_RVV_DEMOTE
1724 #define HWY_RVV_DEMOTE_F(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1725 SHIFT, MLEN, NAME, OP) \
1726 template <size_t N> \
1727 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME( \
1728 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1729 return OP##SEWH##LMULH(v, Lanes(d)); \
1732 #if HWY_HAVE_FLOAT16
1738 #undef HWY_RVV_DEMOTE_F
1743 return vfncvt_rtz_x_f_w_i32mf2(
v,
Lanes(
d));
1747 return vfncvt_rtz_x_f_w_i32mf2(
v,
Lanes(
d));
1751 return vfncvt_rtz_x_f_w_i32m1(
v,
Lanes(
d));
1755 return vfncvt_rtz_x_f_w_i32m2(
v,
Lanes(
d));
1759 return vfncvt_rtz_x_f_w_i32m4(
v,
Lanes(
d));
1762 template <
size_t N,
int kPow2>
1766 const Rebind<uint32_t, decltype(
d)> du32;
1767 return detail::DemoteToShr16(du16,
BitCast(du32,
v));
1772 #define HWY_RVV_CONVERT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1773 SHIFT, MLEN, NAME, OP) \
1774 template <size_t N> \
1775 HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo( \
1776 HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(int, SEW, LMUL) v) { \
1777 return vfcvt_f_x_v_f##SEW##LMUL(v, Lanes(d)); \
1780 template <size_t N> \
1781 HWY_API HWY_RVV_V(int, SEW, LMUL) ConvertTo(HWY_RVV_D(int, SEW, N, SHIFT) d, \
1782 HWY_RVV_V(BASE, SEW, LMUL) v) { \
1783 return vfcvt_rtz_x_f_v_i##SEW##LMUL(v, Lanes(d)); \
1787 #undef HWY_RVV_CONVERT
1790 #define HWY_RVV_NEAREST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1791 SHIFT, MLEN, NAME, OP) \
1792 HWY_API HWY_RVV_V(int, SEW, LMUL) NearestInt(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1793 return vfcvt_x_f_v_i##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
1796 #undef HWY_RVV_NEAREST
1804 template <
typename T,
size_t N,
int kPow2>
1806 size_t lpb = 16 /
sizeof(T);
1811 if (kPow2 >= 0)
return lpb;
1816 template <
class D,
class V>
1822 template <
size_t kLanes,
class D>
1827 return LtS(
BitCast(di, idx_mod),
static_cast<TFromD<decltype(di)
>>(kLanes));
1831 #define HWY_RVV_SLIDE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1833 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1834 NAME(HWY_RVV_V(BASE, SEW, LMUL) dst, HWY_RVV_V(BASE, SEW, LMUL) src, \
1836 return v##OP##_vx_##CHAR##SEW##LMUL(dst, src, lanes, \
1837 HWY_RVV_AVL(SEW, SHIFT)); \
1843 #undef HWY_RVV_SLIDE
1848 template <
class D,
class V>
1854 template <
class D,
class V>
1856 return detail::SlideUp(lo, hi,
Lanes(
d) / 2);
1860 template <
class D,
class V>
1863 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
d) / 2);
1868 template <
class D,
class V>
1871 const auto hi_up = detail::SlideUp(hi, hi,
Lanes(
d) / 2);
1872 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
d) / 2);
1877 template <
class D2,
class V>
1879 return detail::SlideUp(detail::Ext(d2, lo), detail::Ext(d2, hi),
1885 template <
class D2,
class V>
1904 template <
class DH, hwy::EnableIf<detail::IsSupportedLMUL(DH())>* =
nullptr>
1914 template <
class DH,
class V,
1935 #define HWY_RVV_SLIDE1(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1937 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1938 return v##OP##_##CHAR##SEW##LMUL(v, 0, HWY_RVV_AVL(SEW, SHIFT)); \
1945 #undef HWY_RVV_SLIDE1
1950 #define HWY_RVV_GET_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1951 SHIFT, MLEN, NAME, OP) \
1952 HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1953 return v##OP##_s_##CHAR##SEW##LMUL##_##CHAR##SEW(v); \
1958 #undef HWY_RVV_GET_LANE
1963 return GetLane(detail::SlideDown(
v,
v, i));
1968 template <
class V, HWY_IF_NOT_LANE_SIZE_V(V, 1)>
1972 using TU =
TFromD<decltype(du)>;
1973 const auto is_i = detail::EqS(
detail::Iota0(du),
static_cast<TU
>(i));
1982 template <
class V, HWY_IF_LANE_SIZE_V(V, 1)>
1985 const auto zero =
Zero(
d);
1986 const auto one =
Set(
d, 1);
1987 const auto ge_i =
Eq(detail::SlideUp(zero, one, i), one);
1988 const auto is_i = detail::SetOnlyFirst(ge_i);
1996 const auto is_even = detail::EqS(detail::AndS(
detail::Iota0(du), 1), 0);
2003 const V up = detail::Slide1Up(
v);
2010 const V down = detail::Slide1Down(
v);
2017 const RebindToUnsigned<DFromV<V>> du;
2018 constexpr
size_t kShift =
CeilLog2(16 /
sizeof(TFromV<V>));
2019 const auto idx_block = ShiftRight<kShift>(
detail::Iota0(du));
2020 const auto is_even = detail::EqS(detail::AndS(idx_block, 1), 0);
2030 const V down = detail::SlideDown(
v,
v, lpb);
2031 const V up = detail::SlideUp(
v,
v, lpb);
2037 template <
class D,
class VI>
2039 static_assert(
sizeof(TFromD<D>) ==
sizeof(TFromV<VI>),
"Index != lane");
2041 const auto indices =
BitCast(du, vec);
2042 #if HWY_IS_DEBUG_BUILD
2048 template <
class D,
typename TI>
2050 static_assert(
sizeof(TFromD<D>) ==
sizeof(TI),
"Index size must match lane");
2056 #define HWY_RVV_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
2058 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2059 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) { \
2060 return v##OP##_vv_##CHAR##SEW##LMUL(v, idx, HWY_RVV_AVL(SEW, SHIFT)); \
2064 #undef HWY_RVV_TABLE
2067 template <
class D,
class V>
2071 const auto idx = detail::AddS(
Add(iota, iota), 1);
2074 return detail::SlideUp(lo_odd, hi_odd,
Lanes(
d) / 2);
2078 template <
class D,
class V>
2082 const auto idx =
Add(iota, iota);
2085 return detail::SlideUp(lo_even, hi_even,
Lanes(
d) / 2);
2092 using TU =
TFromD<decltype(du)>;
2093 const size_t N =
Lanes(du);
2111 const Twice<decltype(
d)> d2;
2112 const Twice<decltype(d2)> d4;
2114 const auto vx = detail::Ext(d4, detail::Ext(d2,
v));
2121 template <
class D, HWY_IF_LANE_SIZE_D(D, 4), HWY_RVV_IF_POW2_IN(D, 0, 3)>
2130 const Twice<decltype(
d)> d2;
2131 const Twice<decltype(d2)> d4;
2133 const auto vx = detail::Ext(d4, detail::Ext(d2,
v));
2138 template <
class D,
class V = VFromD<D>, HWY_IF_LANE_SIZE_D(D, 8)>
2140 const V up = detail::Slide1Up(
v);
2141 const V down = detail::Slide1Down(
v);
2149 const RebindToUnsigned<D> du;
2158 const RebindToUnsigned<D> du;
2164 template <
class D,
class V = VFromD<D>>
2166 const Repartition<uint64_t, D> du64;
2167 const size_t N =
Lanes(du64);
2169 detail::ReverseSubS(
detail::Iota0(du64),
static_cast<uint64_t
>(
N - 1));
2171 const auto idx = detail::XorS(rev, 1);
2177 template <
typename T>
2178 struct CompressIsPartition {
2182 #define HWY_RVV_COMPRESS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2183 SHIFT, MLEN, NAME, OP) \
2184 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2185 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) mask) { \
2186 return v##OP##_vm_##CHAR##SEW##LMUL(mask, v, v, HWY_RVV_AVL(SEW, SHIFT)); \
2191 #undef HWY_RVV_COMPRESS
2194 template <
class V,
class M>
2200 template <
class V,
class M>
2206 template <
class V,
class M,
class D>
2214 template <
class V,
class M,
class D>
2218 detail::StoreN(count,
Compress(
v, mask),
d, unaligned);
2225 template <
size_t kBytes,
class D,
class V = VFromD<D>>
2228 const auto hi8 =
BitCast(d8, hi);
2229 const auto lo8 =
BitCast(d8, lo);
2230 const auto hi_up = detail::SlideUp(hi8, hi8, 16 - kBytes);
2231 const auto lo_down = detail::SlideDown(lo8, lo8, kBytes);
2237 template <
size_t kLanes,
class D,
class V = VFromD<D>>
2239 constexpr
size_t kLanesUp = 16 /
sizeof(
TFromV<V>) - kLanes;
2240 const auto hi_up = detail::SlideUp(hi, hi, kLanesUp);
2241 const auto lo_down = detail::SlideDown(lo, lo, kLanes);
2242 const auto is_lo = detail::FirstNPerBlock<kLanesUp>(
d);
2250 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2253 return BitCast(
d,
Or(ShiftRight<32>(v64), ShiftLeft<32>(v64)));
2260 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2261 return CombineShiftRightLanes<3>(
d,
v,
v);
2268 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2269 return CombineShiftRightLanes<1>(
d,
v,
v);
2276 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2277 return CombineShiftRightLanes<2>(
d,
v,
v);
2284 static_assert(
sizeof(
TFromD<decltype(
d)>) == 8,
"Defined for 64-bit types");
2285 return CombineShiftRightLanes<1>(
d,
v,
v);
2299 template <
typename T,
size_t N,
int kPow2>
2302 const Simd<T,
N, kPow2 - 1> dh;
2303 const Simd<T,
N, kPow2 - 2> dhh;
2304 return Ext(
d, Ext(dh, Ext(dhh,
v)));
2306 template <
typename T,
size_t N,
int kPow2>
2309 const Simd<T,
N, kPow2 - 1> dh;
2310 return Ext(
d, Ext(dh,
v));
2312 template <
typename T,
size_t N,
int kPow2>
2318 template <
typename T,
size_t N,
int kPow2>
2324 template <
typename T,
size_t N,
int kPow2>
2329 template <
typename T,
size_t N,
int kPow2>
2334 template <
typename T,
size_t N,
int kPow2>
2342 template <
class VT,
class VI>
2352 constexpr
int kPow2T =
Pow2(dt8);
2353 constexpr
int kPow2I =
Pow2(di8);
2360 if (kPow2T < kPow2I) {
2361 offsets = detail::AndS(offsets,
Lanes(dt8) - 1);
2367 template <
class VT,
class VI>
2371 const auto idx8 =
BitCast(di8, idx);
2377 template <
int kLane,
class V>
2383 idx = detail::AddS(idx, kLane);
2390 template <
size_t kLanes,
class D,
class V = VFromD<D>>
2393 using TI =
TFromD<decltype(di)>;
2394 const auto shifted = detail::SlideUp(
v,
v, kLanes);
2396 const auto idx_mod =
2398 const auto clear = detail::LtS(
BitCast(di, idx_mod),
static_cast<TI
>(kLanes));
2402 template <
size_t kLanes,
class V>
2404 return ShiftLeftLanes<kLanes>(DFromV<V>(),
v);
2409 template <
int kBytes,
class D>
2415 template <
int kBytes,
class V>
2417 return ShiftLeftBytes<kBytes>(DFromV<V>(),
v);
2421 template <
size_t kLanes,
typename T,
size_t N,
int kPow2,
2422 class V = VFromD<Simd<T, N, kPow2>>>
2425 using TI =
TFromD<decltype(di)>;
2427 if (
N <= 16 /
sizeof(T)) {
2431 const auto shifted = detail::SlideDown(
v,
v, kLanes);
2434 const auto idx_mod = detail::AndS(
detail::Iota0(di), lpb - 1);
2436 detail::LtS(
BitCast(di, idx_mod),
static_cast<TI
>(lpb - kLanes));
2441 template <
int kBytes,
class D,
class V = VFromD<D>>
2449 template <
class D,
class V>
2451 static_assert(
IsSame<TFromD<D>, TFromV<V>>(),
"D/V mismatch");
2454 const auto idx_mod =
2457 const auto is_even = detail::EqS(detail::AndS(i, 1), 0u);
2469 template <
class D,
class V>
2475 const auto idx_mod = ShiftRight<1>(detail::AndS(i, lpb - 1));
2477 const auto idx = detail::AddS(idx_lower, lpb / 2);
2478 const auto is_even = detail::EqS(detail::AndS(i, 1), 0u);
2485 template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
2487 const RepartitionToNarrow<DW> dn;
2488 static_assert(
IsSame<
TFromD<decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
2492 template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
2498 template <
class DW,
class V>
2500 const RepartitionToNarrow<DW> dn;
2501 static_assert(
IsSame<
TFromD<decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
2508 #define HWY_RVV_REDUCE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
2510 template <class D> \
2511 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2512 NAME(D d, HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, m1) v0) { \
2513 return Set(d, GetLane(v##OP##_vs_##CHAR##SEW##LMUL##_##CHAR##SEW##m1( \
2514 v0, v, v0, Lanes(d)))); \
2527 return detail::RedSum(
d,
v, v0);
2541 const auto neutral =
Set(d1, HighestValue<T>());
2542 return detail::RedMin(
d,
v, neutral);
2556 const auto neutral =
Set(d1, LowestValue<T>());
2557 return detail::RedMax(
d,
v, neutral);
2560 #undef HWY_RVV_REDUCE
2567 template <
typename V,
class D = DFromV<V>, HWY_IF_LANE_SIZE_D(D, 1),
2568 hwy::EnableIf<Pow2(D()) < 1 || MaxLanes(D()) < 16>* =
nullptr>
2569 HWY_API V PopulationCount(V v) {
2571 v = Sub(v, detail::AndS(ShiftRight<1>(v), 0x55));
2572 v = Add(detail::AndS(ShiftRight<2>(v), 0x33), detail::AndS(v, 0x33));
2573 return detail::AndS(Add(v, ShiftRight<4>(v)), 0x0F);
2580 const auto loaded =
Load(
d, p);
2600 #define HWY_RVV_LOAD_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP) \
2601 HWY_INLINE HWY_RVV_M(MLEN) \
2602 NAME(hwy::SizeTag<MLEN> , const uint8_t* bits, size_t N) { \
2603 return OP##_v_b##MLEN(bits, N); \
2606 #undef HWY_RVV_LOAD_MASK_BITS
2609 template <
class D,
class MT = detail::MaskTag<D>>
2616 #define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP) \
2617 template <class D> \
2618 HWY_API size_t NAME(D d, HWY_RVV_M(MLEN) m, uint8_t* bits) { \
2619 const size_t N = Lanes(d); \
2620 OP##_v_b##MLEN(bits, m, N); \
2623 constexpr bool kLessThan8 = \
2624 detail::ScaleByPower(16 / sizeof(TFromD<D>), Pow2(d)) < 8; \
2625 if (MaxLanes(d) < 8 || (kLessThan8 && N < 8)) { \
2626 const int mask = (1 << N) - 1; \
2627 bits[0] = static_cast<uint8_t>(bits[0] & mask); \
2629 return (N + 7) / 8; \
2632 #undef HWY_RVV_STORE_MASK_BITS
2650 template <
class D, HWY_IF_NOT_LANE_SIZE_D(D, 1)>
2652 const RebindToSigned<D> di;
2653 using TI =
TFromD<decltype(di)>;
2658 template <
class D, HWY_IF_LANE_SIZE_D(D, 1)>
2660 const auto zero =
Zero(
d);
2661 const auto one =
Set(
d, 1);
2662 return Eq(detail::SlideUp(one, zero, n), one);
2667 template <
class V, HWY_IF_SIGNED_V(V)>
2669 return detail::ReverseSubS(
v, 0);
2673 #define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2674 SHIFT, MLEN, NAME, OP) \
2675 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
2676 return v##OP##_vv_##CHAR##SEW##LMUL(v, v, HWY_RVV_AVL(SEW, SHIFT)); \
2683 template <
class V, HWY_IF_SIGNED_V(V)>
2690 #undef HWY_RVV_RETV_ARGV2
2706 enum RoundingModes { kNear, kTrunc, kDown, kUp };
2720 const auto int_f =
ConvertTo(df, integer);
2732 const auto int_f =
ConvertTo(df, integer);
2740 asm volatile(
"fsrm %0" ::
"r"(detail::kUp));
2741 const auto ret =
Round(
v);
2742 asm volatile(
"fsrm %0" ::
"r"(detail::kNear));
2749 asm volatile(
"fsrm %0" ::
"r"(detail::kDown));
2750 const auto ret =
Round(
v);
2751 asm volatile(
"fsrm %0" ::
"r"(detail::kNear));
2765 template <
class V,
class D = DFromV<V>>
2772 return RebindMask(
d, detail::EqS(
Add(vi, vi), hwy::MaxExponentTimes2<T>()));
2776 template <
class V,
class D = DFromV<V>>
2786 const VFromD<decltype(di)> exp =
2788 return RebindMask(
d, detail::LtS(exp, hwy::MaxExponentField<T>()));
2793 template <
class D, HWY_IF_UNSIGNED_D(D)>
2798 template <
class D, HWY_IF_SIGNED_D(D)>
2800 const RebindToUnsigned<D> du;
2804 template <
class D, HWY_IF_FLOAT_D(D)>
2806 const RebindToUnsigned<D> du;
2807 const RebindToSigned<D> di;
2813 template <
class V, HWY_IF_LANE_SIZE_V(V, 4),
class D = DFromV<V>,
2814 class DW = RepartitionToW
ide<D>>
2816 const auto lo =
Mul(a, b);
2822 template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
2826 return OddEven(detail::Slide1Up(hi), lo);
2829 template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
2833 return OddEven(hi, detail::Slide1Down(lo));
2838 template <
size_t N,
int kPow2>
2845 const VFromD<decltype(du32)> b_in_even = ShiftRight<16>(
BitCast(du32, b));
2854 template <
size_t N,
int kPow2>
2858 const VFromD<decltype(df32)> sum0,
2859 VFromD<decltype(df32)>& sum1)
2860 ->
VFromD<decltype(df32)> {
2863 using VU32 =
VFromD<decltype(du32)>;
2864 const VFromD<decltype(du16)> zero =
Zero(du16);
2893 const VFromD<D> ltLx = detail::Slide1Up(ltHL);
2912 const VFromD<D> aXH = detail::Slide1Down(a);
2913 const VFromD<D> bXH = detail::Slide1Down(b);
2914 const VFromD<D> minHL =
Min(a, b);
2915 const MFromD<D> ltXH =
Lt(aXH, bXH);
2916 const MFromD<D> eqXH =
Eq(aXH, bXH);
2926 const VFromD<D> aXH = detail::Slide1Down(a);
2927 const VFromD<D> bXH = detail::Slide1Down(b);
2928 const VFromD<D> maxHL =
Max(a, b);
2929 const MFromD<D> ltXH =
Lt(aXH, bXH);
2930 const MFromD<D> eqXH =
Eq(aXH, bXH);
2952 #undef HWY_RVV_FOREACH
2953 #undef HWY_RVV_FOREACH_08_ALL
2954 #undef HWY_RVV_FOREACH_08_ALL_VIRT
2955 #undef HWY_RVV_FOREACH_08_DEMOTE
2956 #undef HWY_RVV_FOREACH_08_DEMOTE_VIRT
2957 #undef HWY_RVV_FOREACH_08_EXT
2958 #undef HWY_RVV_FOREACH_08_EXT_VIRT
2959 #undef HWY_RVV_FOREACH_08_TRUNC
2960 #undef HWY_RVV_FOREACH_08_VIRT
2961 #undef HWY_RVV_FOREACH_16_ALL
2962 #undef HWY_RVV_FOREACH_16_ALL_VIRT
2963 #undef HWY_RVV_FOREACH_16_DEMOTE
2964 #undef HWY_RVV_FOREACH_16_DEMOTE_VIRT
2965 #undef HWY_RVV_FOREACH_16_EXT
2966 #undef HWY_RVV_FOREACH_16_EXT_VIRT
2967 #undef HWY_RVV_FOREACH_16_TRUNC
2968 #undef HWY_RVV_FOREACH_16_VIRT
2969 #undef HWY_RVV_FOREACH_32_ALL
2970 #undef HWY_RVV_FOREACH_32_ALL_VIRT
2971 #undef HWY_RVV_FOREACH_32_DEMOTE
2972 #undef HWY_RVV_FOREACH_32_DEMOTE_VIRT
2973 #undef HWY_RVV_FOREACH_32_EXT
2974 #undef HWY_RVV_FOREACH_32_EXT_VIRT
2975 #undef HWY_RVV_FOREACH_32_TRUNC
2976 #undef HWY_RVV_FOREACH_32_VIRT
2977 #undef HWY_RVV_FOREACH_64_ALL
2978 #undef HWY_RVV_FOREACH_64_ALL_VIRT
2979 #undef HWY_RVV_FOREACH_64_DEMOTE
2980 #undef HWY_RVV_FOREACH_64_DEMOTE_VIRT
2981 #undef HWY_RVV_FOREACH_64_EXT
2982 #undef HWY_RVV_FOREACH_64_EXT_VIRT
2983 #undef HWY_RVV_FOREACH_64_TRUNC
2984 #undef HWY_RVV_FOREACH_64_VIRT
2985 #undef HWY_RVV_FOREACH_B
2986 #undef HWY_RVV_FOREACH_F
2987 #undef HWY_RVV_FOREACH_F16
2988 #undef HWY_RVV_FOREACH_F32
2989 #undef HWY_RVV_FOREACH_F3264
2990 #undef HWY_RVV_FOREACH_F64
2991 #undef HWY_RVV_FOREACH_I
2992 #undef HWY_RVV_FOREACH_I08
2993 #undef HWY_RVV_FOREACH_I16
2994 #undef HWY_RVV_FOREACH_I163264
2995 #undef HWY_RVV_FOREACH_I32
2996 #undef HWY_RVV_FOREACH_I64
2997 #undef HWY_RVV_FOREACH_U
2998 #undef HWY_RVV_FOREACH_U08
2999 #undef HWY_RVV_FOREACH_U16
3000 #undef HWY_RVV_FOREACH_U163264
3001 #undef HWY_RVV_FOREACH_U32
3002 #undef HWY_RVV_FOREACH_U64
3003 #undef HWY_RVV_FOREACH_UI
3004 #undef HWY_RVV_FOREACH_UI08
3005 #undef HWY_RVV_FOREACH_UI16
3006 #undef HWY_RVV_FOREACH_UI163264
3007 #undef HWY_RVV_FOREACH_UI32
3008 #undef HWY_RVV_FOREACH_UI3264
3009 #undef HWY_RVV_FOREACH_UI64
3011 #undef HWY_RVV_RETM_ARGM
3012 #undef HWY_RVV_RETV_ARGV
3013 #undef HWY_RVV_RETV_ARGVS
3014 #undef HWY_RVV_RETV_ARGVV
#define HWY_MAX(a, b)
Definition: base.h:126
#define HWY_RESTRICT
Definition: base.h:61
#define HWY_API
Definition: base.h:120
#define HWY_MIN(a, b)
Definition: base.h:125
#define HWY_INLINE
Definition: base.h:62
#define HWY_DASSERT(condition)
Definition: base.h:191
HWY_INLINE VFromD< DU > BitCastToUnsigned(V v)
Definition: rvv-inl.h:691
HWY_INLINE Mask128< float, N > UseInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3345
HWY_INLINE MFromD< D > FirstNPerBlock(D)
Definition: rvv-inl.h:1823
HWY_INLINE V OffsetsOf128BitBlocks(const D d, const V iota0)
Definition: rvv-inl.h:1817
HWY_INLINE Mask128< T, N > MaskFromVec(hwy::SizeTag< 1 >, const Vec128< T, N > v)
Definition: x86_128-inl.h:1520
constexpr size_t LanesPerBlock(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:1937
HWY_INLINE auto ChangeLMUL(Simd< T, N, kPow2 > d, VFromD< Simd< T, N, kPow2 - 3 >> v) -> VFromD< decltype(d)>
Definition: rvv-inl.h:2300
HWY_INLINE VFromD< DU > Iota0(const D)
Definition: rvv-inl.h:712
constexpr size_t ScaleByPower(size_t N, int pow2)
Definition: ops/shared-inl.h:111
constexpr bool IsSupportedLMUL(D d)
Definition: rvv-inl.h:1897
HWY_INLINE Vec128< uint8_t, N > BitCastFromByte(Simd< uint8_t, N, 0 >, Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:879
constexpr bool IsFull(Simd< T, N, kPow2 >)
Definition: ops/shared-inl.h:103
HWY_INLINE Vec128< uint8_t, N > BitCastToByte(Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:852
HWY_INLINE Mask128< T, N > LoadMaskBits(Simd< T, N, 0 > d, uint64_t mask_bits)
Definition: arm_neon-inl.h:4962
d
Definition: rvv-inl.h:1742
HWY_API Vec128< T, N > CopySign(const Vec128< T, N > magn, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:2149
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition: arm_neon-inl.h:4533
HWY_API Vec128< T, N > ShiftLeft(Vec128< T, N > v)
Definition: emu128-inl.h:392
HWY_API Vec128< T, N > AverageRound(Vec128< T, N > a, const Vec128< T, N > b)
Definition: emu128-inl.h:616
decltype(MaskFromVec(Zero(D()))) MFromD
Definition: rvv-inl.h:1155
HWY_API Vec128< uint8_t > Combine(Full128< uint8_t >, Vec64< uint8_t > hi, Vec64< uint8_t > lo)
Definition: arm_neon-inl.h:4224
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:2189
HWY_API Vec128< T, N > DupOdd(Vec128< T, N > v)
Definition: arm_neon-inl.h:4498
HWY_API Vec128< float > ApproximateReciprocal(const Vec128< float > v)
Definition: arm_neon-inl.h:1719
HWY_API VFromD< DW > ZipLower(V a, V b)
Definition: arm_neon-inl.h:4187
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:5305
HWY_API void LoadInterleaved2(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1)
Definition: arm_neon-inl.h:5938
HWY_API Vec128< T > Shuffle1032(const Vec128< T > v)
Definition: arm_neon-inl.h:4046
HWY_API void StoreInterleaved4(const Vec128< T, N > v0, const Vec128< T, N > v1, const Vec128< T, N > v2, const Vec128< T, N > v3, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6173
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6309
HWY_API Vec128< uint64_t > InterleaveLower(const Vec128< uint64_t > a, const Vec128< uint64_t > b)
Definition: arm_neon-inl.h:4096
HWY_API Vec128< T > Shuffle2103(const Vec128< T > v)
Definition: arm_neon-inl.h:4062
HWY_API Vec128< float, N > Round(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3363
HWY_API Vec128< T, N > ZeroExtendVector(Simd< T, N, 0 > d, Vec128< T, N/2 > lo)
Definition: arm_neon-inl.h:4284
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6301
HWY_API Mask128< T, N > IsNaN(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3433
HWY_API intptr_t FindFirstTrue(const Simd< T, N, 0 > d, const Mask128< T, N > mask)
Definition: arm_neon-inl.h:5280
RepartitionToNarrow< RebindToUnsigned< DF > > DU16FromDF
Definition: rvv-inl.h:2852
HWY_API V128 CombineShiftRightBytes(Full128< T > d, V128 hi, V128 lo)
Definition: arm_neon-inl.h:3514
HWY_API auto Gt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6314
HWY_API Vec128< T, N > ShiftLeftLanes(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3617
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2409
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:5290
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1784
constexpr HWY_API size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:236
HWY_API void Stream(const Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2901
HWY_API Vec128< int8_t > Abs(const Vec128< int8_t > v)
Definition: arm_neon-inl.h:2105
Repartition< MakeWide< TFromD< D > >, D > RepartitionToWide
Definition: ops/shared-inl.h:209
HWY_API Vec128< T, N > SumOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4932
HWY_API Vec128< T, N > BroadcastSignBit(const Vec128< T, N > v)
Definition: arm_neon-inl.h:2166
V Shl(V a, V b)
Definition: arm_neon-inl.h:6292
HWY_RVV_FOREACH_F64(HWY_RVV_DEMOTE_F, DemoteTo, vfncvt_rod_f_f_w_f, _DEMOTE_VIRT) template< size_t N > HWY_API vint32mf2_t DemoteTo(Simd< int32_t
HWY_API auto Ge(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6318
HWY_API Vec128< float > AbsDiff(const Vec128< float > a, const Vec128< float > b)
Definition: arm_neon-inl.h:1758
HWY_API Vec128< uint64_t, N > Min(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:2470
HWY_API Vec128< uint64_t, N > Max(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:2508
HWY_API Mask128< T, N > MaskFromVec(const Vec128< T, N > v)
Definition: arm_neon-inl.h:2176
HWY_API Vec128< T, N > ConcatUpperUpper(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4353
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition: ops/shared-inl.h:200
HWY_API Vec128< T, N > GatherIndex(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:4779
HWY_INLINE constexpr HWY_MAYBE_UNUSED int Pow2(D)
Definition: ops/shared-inl.h:252
HWY_INLINE Vec128< uint64_t > MulOdd(Vec128< uint64_t > a, Vec128< uint64_t > b)
Definition: arm_neon-inl.h:4654
N ConcatEven(Simd< T, N, 0 >, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4453
HWY_API Vec128< T > Shuffle0321(const Vec128< T > v)
Definition: arm_neon-inl.h:4056
HWY_API Mask128< T, N > IsInf(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3438
HWY_API Vec128< T, N > ConcatLowerUpper(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4380
HWY_API Vec128< int64_t > Neg(const Vec128< int64_t > v)
Definition: arm_neon-inl.h:1398
HWY_API Vec128< int64_t > MulEven(Vec128< int32_t > a, Vec128< int32_t > b)
Definition: arm_neon-inl.h:4614
HWY_API Vec128< bfloat16_t, 2 *N > ReorderDemote2To(Simd< bfloat16_t, 2 *N, 0 > dbf16, Vec128< float, N > a, Vec128< float, N > b)
Definition: arm_neon-inl.h:4555
HWY_API Vec128< T, 1 > CompressNot(Vec128< T, 1 > v, Mask128< T, 1 >)
Definition: arm_neon-inl.h:5787
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N, 0 > d, const T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2711
HWY_API Vec128< T, N/2 > LowerHalf(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3467
typename D::Twice Twice
Definition: ops/shared-inl.h:219
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: ops/shared-inl.h:198
HWY_API Vec128< uint64_t > CompressBlocksNot(Vec128< uint64_t > v, Mask128< uint64_t >)
Definition: arm_neon-inl.h:5815
HWY_API Vec32< uint8_t > U8FromU32(const Vec128< uint32_t > v)
Definition: arm_neon-inl.h:3233
HWY_API Vec128< float, N > ReorderWidenMulAccumulate(Simd< float, N, 0 > df32, Vec128< bfloat16_t, 2 *N > a, Vec128< bfloat16_t, 2 *N > b, const Vec128< float, N > sum0, Vec128< float, N > &sum1)
Definition: arm_neon-inl.h:4203
HWY_API Vec128< T, N > SaturatedAdd(Vec128< T, N > a, const Vec128< T, N > b)
Definition: emu128-inl.h:594
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:2006
HWY_API void BlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2887
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:5269
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:2182
HWY_API Vec128< float > ConvertTo(Full128< float >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:3273
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition: arm_neon-inl.h:4482
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition: arm_neon-inl.h:2212
HWY_API V Add(V a, V b)
Definition: arm_neon-inl.h:6274
HWY_API Vec128< T, N > ShiftRight(Vec128< T, N > v)
Definition: emu128-inl.h:402
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2706
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition: emu128-inl.h:325
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition: arm_neon-inl.h:3934
HWY_API Vec128< T, N > Reverse4(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4005
HWY_API Vec128< float, N > Floor(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3394
HWY_API Vec128< float, N > MulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1838
HWY_API Vec128< T, N > CopySignToAbs(const Vec128< T, N > abs, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:2157
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2725
HWY_INLINE VFromD< D > Min128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6260
constexpr size_t MLenFromD(Simd< T, N, kPow2 >)
Definition: rvv-inl.h:43
N ConcatOdd(Simd< T, N, 0 >, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4422
HWY_API Vec128< float, N > Ceil(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3380
Repartition< MakeNarrow< TFromD< D > >, D > RepartitionToNarrow
Definition: ops/shared-inl.h:211
HWY_API Indices128< T, N > IndicesFromVec(Simd< T, N, 0 > d, Vec128< TI, N > vec)
Definition: arm_neon-inl.h:3888
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition: arm_neon-inl.h:4540
HWY_API Vec128< T, N > ShiftLeftBytes(Simd< T, N, 0 >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3606
HWY_INLINE VFromD< D > Min128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6250
HWY_API Vec128< T, N > Reverse2(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3976
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:312
HWY_API Vec128< T, N > Reverse8(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4028
HWY_API Vec128< T, N > MaxOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4940
Vec128< T, N > Iota(const Simd< T, N, 0 > d, const T2 first)
Definition: arm_neon-inl.h:1035
HWY_API Mask128< T, N > LoadMaskBits(Simd< T, N, 0 > d, const uint8_t *HWY_RESTRICT bits)
Definition: arm_neon-inl.h:5005
HWY_API Vec128< T, N > ZeroIfNegative(Vec128< T, N > v)
Definition: arm_neon-inl.h:2236
HWY_API Vec128< T > Shuffle01(const Vec128< T > v)
Definition: arm_neon-inl.h:4050
HWY_API Mask128< uint64_t, N > TestBit(Vec128< uint64_t, N > v, Vec128< uint64_t, N > bit)
Definition: arm_neon-inl.h:2430
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2544
HWY_API Vec128< int16_t > MulHigh(const Vec128< int16_t > a, const Vec128< int16_t > b)
Definition: arm_neon-inl.h:1669
HWY_INLINE VFromD< D > Max128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6265
HWY_INLINE Mask128< T, N > Lt128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6212
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition: arm_neon-inl.h:1999
HWY_API Vec128< T, N > IfNegativeThenElse(Vec128< T, N > v, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:2225
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4406
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1934
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition: ops/shared-inl.h:161
HWY_API Vec128< T, N > BitCast(Simd< T, N, 0 > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition: arm_neon-inl.h:988
HWY_API bool AllFalse(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:5299
HWY_API T ExtractLane(const Vec128< T, 1 > v, size_t i)
Definition: arm_neon-inl.h:1070
HWY_API Vec128< int16_t > MulFixedPoint15(Vec128< int16_t > a, Vec128< int16_t > b)
Definition: arm_neon-inl.h:1705
HWY_API void ScatterOffset(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:4726
HWY_API Vec128< T, N > Undefined(Simd< T, N, 0 >)
Definition: arm_neon-inl.h:1025
HWY_API VFromD< DW > ZipUpper(DW dw, V a, V b)
Definition: arm_neon-inl.h:4196
HWY_API Vec128< T, N > ConcatLowerLower(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4292
HWY_API V Sub(V a, V b)
Definition: arm_neon-inl.h:6278
HWY_API Vec128< T, N > SaturatedSub(Vec128< T, N > a, const Vec128< T, N > b)
Definition: emu128-inl.h:605
typename D::template Rebind< T > Rebind
Definition: ops/shared-inl.h:195
HWY_INLINE constexpr HWY_MAYBE_UNUSED size_t MaxLanes(D)
Definition: ops/shared-inl.h:276
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:1011
HWY_API size_t CompressBitsStore(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:5862
HWY_API V InterleaveUpper(Simd< T, N, 0 >, V a, V b)
Definition: arm_neon-inl.h:4171
HWY_API Vec128< T, N > GatherOffset(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:4762
HWY_API size_t CompressBlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:5846
HWY_API void LoadInterleaved3(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1, Vec128< T, N > &v2)
Definition: arm_neon-inl.h:5976
HWY_API Vec128< T, N > IfThenZeroElse(const Mask128< T, N > mask, const Vec128< T, N > no)
Definition: arm_neon-inl.h:2219
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1983
HWY_INLINE VFromD< D > Max128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6255
HWY_API auto Le(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6323
decltype(detail::DeduceD()(V())) DFromV
Definition: arm_neon-inl.h:833
HWY_API Vec128< int32_t, N > NearestInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3424
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition: arm_neon-inl.h:3928
HWY_API TFromV< V > GetLane(const V v)
Definition: arm_neon-inl.h:1061
HWY_API void ScatterIndex(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:4744
HWY_API Vec128< T > Not(const Vec128< T > v)
Definition: arm_neon-inl.h:1916
HWY_API Vec128< float, N > NegMulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1817
HWY_API Vec64< uint32_t > Shuffle2301(const Vec64< uint32_t > v)
Definition: arm_neon-inl.h:2279
HWY_API Vec128< T, N > Or3(Vec128< T, N > o1, Vec128< T, N > o2, Vec128< T, N > o3)
Definition: arm_neon-inl.h:1992
V Shr(V a, V b)
Definition: arm_neon-inl.h:6296
decltype(Zero(D())) VFromD
Definition: arm_neon-inl.h:1021
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition: arm_neon-inl.h:2718
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:4514
HWY_API Vec128< uint16_t > Broadcast(const Vec128< uint16_t > v)
Definition: arm_neon-inl.h:3800
HWY_INLINE Vec128< T, N > CompressBits(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits)
Definition: arm_neon-inl.h:5823
HWY_API Vec128< T > Shuffle0123(const Vec128< T > v)
Definition: arm_neon-inl.h:4068
HWY_API Vec128< float, N > Trunc(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3352
typename D::Half Half
Definition: ops/shared-inl.h:215
HWY_API Vec128< T, N > MinOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4936
HWY_API Vec128< T, N > ShiftRightBytes(Simd< T, N, 0 >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3629
typename D::template Repartition< T > Repartition
Definition: ops/shared-inl.h:206
HWY_API auto Ne(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6305
N
Definition: rvv-inl.h:1742
HWY_API Vec128< float, N > Sqrt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:1898
HWY_API Vec64< uint16_t > DemoteTo(Full64< uint16_t >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:3091
HWY_API Vec128< TI > TableLookupBytes(const Vec128< T > bytes, const Vec128< TI > from)
Definition: arm_neon-inl.h:4664
HWY_API size_t CompressStore(Vec128< T, N > v, const Mask128< T, N > mask, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:5837
HWY_API Vec128< uint32_t, N > RotateRight(const Vec128< uint32_t, N > v)
Definition: arm_neon-inl.h:1429
HWY_API Mask128< T, N > IsFinite(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3448
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition: arm_neon-inl.h:1949
HWY_API void LoadInterleaved4(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1, Vec128< T, N > &v2, Vec128< T, N > &v3)
Definition: arm_neon-inl.h:6017
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition: arm_neon-inl.h:4548
HWY_API V Div(V a, V b)
Definition: arm_neon-inl.h:6287
HWY_API Vec128< uint64_t > SumsOf8(const Vec128< uint8_t > v)
Definition: arm_neon-inl.h:1346
HWY_API void StoreInterleaved2(const Vec128< T, N > v0, const Vec128< T, N > v1, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6106
HWY_API V Mul(V a, V b)
Definition: arm_neon-inl.h:6283
HWY_API Vec128< T, 1 > Reverse(Simd< T, 1, 0 >, const Vec128< T, 1 > v)
Definition: arm_neon-inl.h:3945
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2882
HWY_API Vec128< T, 1 > InsertLane(const Vec128< T, 1 > v, size_t i, T t)
Definition: arm_neon-inl.h:1210
HWY_INLINE Mask128< T, N > Lt128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6240
HWY_API Vec128< uint16_t > PromoteTo(Full128< uint16_t >, const Vec64< uint8_t > v)
Definition: arm_neon-inl.h:2911
TFromD< DFromV< V > > TFromV
Definition: arm_neon-inl.h:836
HWY_API Vec64< uint8_t > UpperHalf(Full64< uint8_t >, const Vec128< uint8_t > v)
Definition: arm_neon-inl.h:3661
HWY_API Vec128< float > ApproximateReciprocalSqrt(const Vec128< float > v)
Definition: arm_neon-inl.h:1870
const vfloat64m1_t v
Definition: rvv-inl.h:1742
HWY_API V Trunc(const V v)
Definition: rvv-inl.h:2727
HWY_API Vec128< T, N > ShiftRightLanes(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3635
HWY_API V CombineShiftRightLanes(const D d, const V hi, V lo)
Definition: rvv-inl.h:2238
HWY_API void StoreInterleaved3(const Vec128< T, N > v0, const Vec128< T, N > v1, const Vec128< T, N > v2, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6138
typename D::T TFromD
Definition: ops/shared-inl.h:191
HWY_API VI TableLookupBytesOr0(const V bytes, const VI from)
Definition: arm_neon-inl.h:4719
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1971
HWY_API Vec128< T, 1 > Compress(Vec128< T, 1 > v, Mask128< T, 1 >)
Definition: arm_neon-inl.h:5763
HWY_API Vec128< float, N > NegMulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1846
Definition: aligned_allocator.h:27
constexpr T MantissaEnd()
Definition: base.h:631
typename EnableIfT< Condition >::type EnableIf
Definition: base.h:309
constexpr HWY_API bool IsSame()
Definition: base.h:322
constexpr size_t CeilLog2(TI x)
Definition: base.h:777
constexpr HWY_API bool IsSigned()
Definition: base.h:534
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:503
#define HWY_IF_LANE_SIZE_D(D, bytes)
Definition: ops/shared-inl.h:235
#define HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:344
#define HWY_RVV_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1399
#define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1561
#define HWY_RVV_CAST_U8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:567
#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1020
#define HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:339
#define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:508
#define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1302
#define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:299
#define HWY_RVV_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1370
#define HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:353
#define HWY_RVV_LOAD2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1435
#define HWY_RVV_CAST_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:610
#define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:293
#define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:370
#define HWY_RVV_BLENDED_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1328
#define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1568
#define HWY_RVV_REDUCE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2508
#define HWY_RVV_CAST_VIRT_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:626
#define HWY_RVV_SLIDE1(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1935
#define HWY_RVV_NEAREST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1790
#define HWY_RVV_STOREN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1341
#define HWY_RVV_EXT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:539
#define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:323
#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:912
#define HWY_RVV_FMA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:992
#define HWY_RVV_DEMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1619
#define HWY_RVV_LANES(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:409
#define HWY_RVV_PROMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1552
#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:903
#define HWY_RVV_STORE2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1483
#define HWY_RVV_SHIFT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:848
#define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:327
#define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1167
#define HWY_RVV_IF_POW2_IN(D, min, max)
Definition: rvv-inl.h:39
#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1115
#define HWY_RVV_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2056
#define HWY_RVV_LOAD3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1450
#define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP)
Definition: rvv-inl.h:59
#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:447
#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:311
#define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:301
#define HWY_RVV_STORE4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1513
#define HWY_RVV_FOREACH_UI163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:349
#define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:463
#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:440
#define HWY_RVV_STORE3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1498
#define HWY_SPECIALIZE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:394
#define HWY_RVV_LOAD_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1240
#define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:287
#define HWY_RVV_SLIDE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1831
#define HWY_RVV_COMPRESS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2182
#define HWY_RVV_FOREACH(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:379
#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:472
#define HWY_RVV_DEMOTE_F(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1724
#define HWY_RVV_TRUNC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:530
#define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:313
#define HWY_RVV_CAST_I8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:581
#define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1096
#define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:364
#define HWY_RVV_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1266
#define HWY_RVV_DEMOTE_I_TO_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1639
#define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:291
#define HWY_RVV_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:701
#define HWY_RVV_CAST_VIRT_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:642
#define HWY_RVV_FOREACH_UI3264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:335
#define HWY_RVV_IF_THEN_ZERO_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1135
#define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_LOAD4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1466
#define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1222
#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_EXT_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:552
#define HWY_RVV_CAST_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:596
#define HWY_RVV_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1315
#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1950
#define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1029
#define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:358
#define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:375
#define HWY_RVV_CONVERT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1772
#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:455
#define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:289
#define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1252
#define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:297
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
@ value
Definition: arm_neon-inl.h:5319
Definition: ops/shared-inl.h:40