Grok  10.0.3
find-inl.h
Go to the documentation of this file.
1 // Copyright 2022 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // Per-target include guard
17 #if defined(HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_) == \
18  defined(HWY_TARGET_TOGGLE)
19 #ifdef HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
20 #undef HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
21 #else
22 #define HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
23 #endif
24 
25 #include "hwy/highway.h"
26 
28 namespace hwy {
29 namespace HWY_NAMESPACE {
30 
31 // Returns index of the first element equal to `value` in `in[0, count)`, or
32 // `count` if not found.
33 template <class D, typename T = TFromD<D>>
34 size_t Find(D d, T value, const T* HWY_RESTRICT in, size_t count) {
35  const size_t N = Lanes(d);
36  const Vec<D> broadcasted = Set(d, value);
37 
38  size_t i = 0;
39  for (; i + N <= count; i += N) {
40  const intptr_t pos = FindFirstTrue(d, Eq(broadcasted, LoadU(d, in + i)));
41  if (pos >= 0) return i + static_cast<size_t>(pos);
42  }
43 
44  if (i != count) {
45 #if HWY_MEM_OPS_MIGHT_FAULT
46  // Scan single elements.
47  const CappedTag<T, 1> d1;
48  using V1 = Vec<decltype(d1)>;
49  const V1 broadcasted1 = Set(d1, GetLane(broadcasted));
50  for (; i < count; ++i) {
51  if (AllTrue(d1, Eq(broadcasted1, LoadU(d1, in + i)))) {
52  return i;
53  }
54  }
55 #else
56  const size_t remaining = count - i;
57  HWY_DASSERT(0 != remaining && remaining < N);
58  const Mask<D> mask = FirstN(d, remaining);
59  const Vec<D> v = MaskedLoad(mask, d, in + i);
60  // Apply mask so that we don't 'find' the zero-padding from MaskedLoad.
61  const intptr_t pos = FindFirstTrue(d, And(Eq(broadcasted, v), mask));
62  if (pos >= 0) return i + static_cast<size_t>(pos);
63 #endif // HWY_MEM_OPS_MIGHT_FAULT
64  }
65 
66  return count; // not found
67 }
68 
69 // Returns index of the first element in `in[0, count)` for which `func(d, vec)`
70 // returns true, otherwise `count`.
71 template <class D, class Func, typename T = TFromD<D>>
72 size_t FindIf(D d, const T* HWY_RESTRICT in, size_t count, const Func& func) {
73  const size_t N = Lanes(d);
74 
75  size_t i = 0;
76  for (; i + N <= count; i += N) {
77  const intptr_t pos = FindFirstTrue(d, func(d, LoadU(d, in + i)));
78  if (pos >= 0) return i + static_cast<size_t>(pos);
79  }
80 
81  if (i != count) {
82 #if HWY_MEM_OPS_MIGHT_FAULT
83  // Scan single elements.
84  const CappedTag<T, 1> d1;
85  for (; i < count; ++i) {
86  if (AllTrue(d1, func(d1, LoadU(d1, in + i)))) {
87  return i;
88  }
89  }
90 #else
91  const size_t remaining = count - i;
92  HWY_DASSERT(0 != remaining && remaining < N);
93  const Mask<D> mask = FirstN(d, remaining);
94  const Vec<D> v = MaskedLoad(mask, d, in + i);
95  // Apply mask so that we don't 'find' the zero-padding from MaskedLoad.
96  const intptr_t pos = FindFirstTrue(d, And(func(d, v), mask));
97  if (pos >= 0) return i + static_cast<size_t>(pos);
98 #endif // HWY_MEM_OPS_MIGHT_FAULT
99  }
100 
101  return count; // not found
102 }
103 
104 // NOLINTNEXTLINE(google-readability-namespace-comments)
105 } // namespace HWY_NAMESPACE
106 } // namespace hwy
108 
109 #endif // HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
#define HWY_RESTRICT
Definition: base.h:61
#define HWY_DASSERT(condition)
Definition: base.h:191
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()
d
Definition: rvv-inl.h:1742
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:5305
size_t Find(D d, T value, const T *HWY_RESTRICT in, size_t count)
Definition: find-inl.h:34
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6301
HWY_API intptr_t FindFirstTrue(const Simd< T, N, 0 > d, const Mask128< T, N > mask)
Definition: arm_neon-inl.h:5280
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2409
constexpr HWY_API size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:236
typename detail::CappedTagChecker< T, kLimit >::type CappedTag
Definition: ops/shared-inl.h:172
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N, 0 > d, const T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2711
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:312
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2544
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1934
HWY_API TFromV< V > GetLane(const V v)
Definition: arm_neon-inl.h:1061
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:38
N
Definition: rvv-inl.h:1742
size_t FindIf(D d, const T *HWY_RESTRICT in, size_t count, const Func &func)
Definition: find-inl.h:72
const vfloat64m1_t v
Definition: rvv-inl.h:1742
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:32
Definition: aligned_allocator.h:27
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:105
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82