17 #if defined(HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_) == \
18 defined(HWY_TARGET_TOGGLE)
19 #ifdef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
20 #undef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
22 #define HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
51 template <
class D,
class Func,
typename T = TFromD<D>>
54 using TU =
TFromD<decltype(du)>;
58 Vec<decltype(du)> vidx =
Iota(du, 0);
59 for (; idx +
N <= count; idx +=
N) {
61 vidx =
Add(vidx,
Set(du,
static_cast<TU
>(
N)));
67 #if HWY_MEM_OPS_MIGHT_FAULT
71 for (; idx < count; ++idx) {
72 StoreU(func(d1,
Set(du1,
static_cast<TU
>(idx))), d1, out + idx);
75 const size_t remaining = count - idx;
84 template <
class D,
class Func,
typename T = TFromD<D>>
89 for (; idx +
N <= count; idx +=
N) {
97 #if HWY_MEM_OPS_MIGHT_FAULT
100 for (; idx < count; ++idx) {
101 using V1 =
Vec<decltype(d1)>;
102 const V1
v =
LoadU(d1, inout + idx);
103 StoreU(func(d1,
v), d1, inout + idx);
106 const size_t remaining = count - idx;
116 template <
class D,
class Func,
typename T = TFromD<D>>
122 for (; idx +
N <= count; idx +=
N) {
131 #if HWY_MEM_OPS_MIGHT_FAULT
134 for (; idx < count; ++idx) {
135 using V1 =
Vec<decltype(d1)>;
136 const V1
v =
LoadU(d1, inout + idx);
137 const V1 v1 =
LoadU(d1, in1 + idx);
138 StoreU(func(d1,
v, v1), d1, inout + idx);
141 const size_t remaining = count - idx;
152 template <
class D,
class Func,
typename T = TFromD<D>>
159 for (; idx +
N <= count; idx +=
N) {
163 StoreU(func(
d,
v, v1, v2),
d, inout + idx);
169 #if HWY_MEM_OPS_MIGHT_FAULT
172 for (; idx < count; ++idx) {
173 using V1 =
Vec<decltype(d1)>;
174 const V1
v =
LoadU(d1, inout + idx);
175 const V1 v1 =
LoadU(d1, in1 + idx);
176 const V1 v2 =
LoadU(d1, in2 + idx);
177 StoreU(func(d1,
v, v1, v2), d1, inout + idx);
180 const size_t remaining = count - idx;
190 template <
class D,
typename T = TFromD<D>>
197 for (; idx +
N <= count; idx +=
N) {
205 #if HWY_MEM_OPS_MIGHT_FAULT
208 const Vec<decltype(d1)> old_v1 =
Set(d1, old_t);
209 const Vec<decltype(d1)> new_v1 =
Set(d1, new_t);
210 for (; idx < count; ++idx) {
211 using V1 =
Vec<decltype(d1)>;
212 const V1 v1 =
LoadU(d1, inout + idx);
216 const size_t remaining = count - idx;
224 template <
class D,
class Func,
typename T = TFromD<D>>
231 for (; idx +
N <= count; idx +=
N) {
239 #if HWY_MEM_OPS_MIGHT_FAULT
242 const Vec<decltype(d1)> new_v1 =
Set(d1, new_t);
243 for (; idx < count; ++idx) {
244 using V1 =
Vec<decltype(d1)>;
245 const V1
v =
LoadU(d1, inout + idx);
249 const size_t remaining = count - idx;
#define HWY_RESTRICT
Definition: base.h:61
#define HWY_DASSERT(condition)
Definition: base.h:191
#define HWY_UNLIKELY(expr)
Definition: base.h:67
d
Definition: rvv-inl.h:1742
void Generate(D d, T *HWY_RESTRICT out, size_t count, const Func &func)
Definition: transform-inl.h:52
void ReplaceIf(D d, T *HWY_RESTRICT inout, size_t count, T new_t, const Func &func)
Definition: transform-inl.h:225
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6301
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2409
constexpr HWY_API size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:236
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition: ops/shared-inl.h:200
typename detail::CappedTagChecker< T, kLimit >::type CappedTag
Definition: ops/shared-inl.h:172
void Transform2(D d, T *HWY_RESTRICT inout, size_t count, const T *HWY_RESTRICT in1, const T *HWY_RESTRICT in2, const Func &func)
Definition: transform-inl.h:153
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N, 0 > d, const T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2711
HWY_API void BlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2887
HWY_API V Add(V a, V b)
Definition: arm_neon-inl.h:6274
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition: emu128-inl.h:325
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2725
void Replace(D d, T *HWY_RESTRICT inout, size_t count, T new_t, T old_t)
Definition: transform-inl.h:191
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:312
Vec128< T, N > Iota(const Simd< T, N, 0 > d, const T2 first)
Definition: arm_neon-inl.h:1035
void Transform(D d, T *HWY_RESTRICT inout, size_t count, const Func &func)
Definition: transform-inl.h:85
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2544
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:38
N
Definition: rvv-inl.h:1742
void Transform1(D d, T *HWY_RESTRICT inout, size_t count, const T *HWY_RESTRICT in1, const Func &func)
Definition: transform-inl.h:117
const vfloat64m1_t v
Definition: rvv-inl.h:1742
typename D::T TFromD
Definition: ops/shared-inl.h:191
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:32
Definition: aligned_allocator.h:27
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:105
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82