Grok  10.0.3
test_util-inl.h
Go to the documentation of this file.
1 // Copyright 2019 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // Target-specific helper functions for use by *_test.cc.
17 
18 #include <inttypes.h>
19 #include <stdint.h>
20 
21 #include "hwy/base.h"
22 #include "hwy/print-inl.h"
23 #include "hwy/tests/hwy_gtest.h"
24 #include "hwy/tests/test_util.h"
25 
26 // Per-target include guard
27 #if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == \
28  defined(HWY_TARGET_TOGGLE)
29 #ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
30 #undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
31 #else
32 #define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
33 #endif
34 
36 namespace hwy {
37 namespace HWY_NAMESPACE {
38 
39 // Compare expected vector to vector.
40 // HWY_INLINE works around a Clang SVE compiler bug where all but the first
41 // 128 bits (the NEON register) of actual are zero.
42 template <class D, typename T = TFromD<D>, class V = Vec<D>>
43 HWY_INLINE void AssertVecEqual(D d, const T* expected, VecArg<V> actual,
44  const char* filename, const int line) {
45  const size_t N = Lanes(d);
46  auto actual_lanes = AllocateAligned<T>(N);
47  Store(actual, d, actual_lanes.get());
48 
49  const auto info = hwy::detail::MakeTypeInfo<T>();
50  const char* target_name = hwy::TargetName(HWY_TARGET);
51  hwy::detail::AssertArrayEqual(info, expected, actual_lanes.get(), N,
52  target_name, filename, line);
53 }
54 
55 // Compare expected lanes to vector.
56 // HWY_INLINE works around a Clang SVE compiler bug where all but the first
57 // 128 bits (the NEON register) of actual are zero.
58 template <class D, typename T = TFromD<D>, class V = Vec<D>>
59 HWY_INLINE void AssertVecEqual(D d, VecArg<V> expected, VecArg<V> actual,
60  const char* filename, int line) {
61  auto expected_lanes = AllocateAligned<T>(Lanes(d));
62  Store(expected, d, expected_lanes.get());
63  AssertVecEqual(d, expected_lanes.get(), actual, filename, line);
64 }
65 
66 // Only checks the valid mask elements (those whose index < Lanes(d)).
67 template <class D>
69  const char* filename, int line) {
70  // lvalues prevented MSAN failure in farm_sve.
71  const Vec<D> va = VecFromMask(d, a);
72  const Vec<D> vb = VecFromMask(d, b);
73  AssertVecEqual(d, va, vb, filename, line);
74 
75  const char* target_name = hwy::TargetName(HWY_TARGET);
76  AssertEqual(CountTrue(d, a), CountTrue(d, b), target_name, filename, line);
77  AssertEqual(AllTrue(d, a), AllTrue(d, b), target_name, filename, line);
78  AssertEqual(AllFalse(d, a), AllFalse(d, b), target_name, filename, line);
79 
80  const size_t N = Lanes(d);
81 #if HWY_TARGET == HWY_SCALAR
82  const Rebind<uint8_t, D> d8;
83 #else
84  const Repartition<uint8_t, D> d8;
85 #endif
86  const size_t N8 = Lanes(d8);
87  auto bits_a = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
88  auto bits_b = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
89  memset(bits_a.get(), 0, N8);
90  memset(bits_b.get(), 0, N8);
91  const size_t num_bytes_a = StoreMaskBits(d, a, bits_a.get());
92  const size_t num_bytes_b = StoreMaskBits(d, b, bits_b.get());
93  AssertEqual(num_bytes_a, num_bytes_b, target_name, filename, line);
94  size_t i = 0;
95  // First check whole bytes (if that many elements are still valid)
96  for (; i < N / 8; ++i) {
97  if (bits_a[i] != bits_b[i]) {
98  fprintf(stderr, "Mismatch in byte %" PRIu64 ": %d != %d\n",
99  static_cast<uint64_t>(i), bits_a[i], bits_b[i]);
100  Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
101  Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
102  hwy::Abort(filename, line, "Masks not equal");
103  }
104  }
105  // Then the valid bit(s) in the last byte.
106  const size_t remainder = N % 8;
107  if (remainder != 0) {
108  const int mask = (1 << remainder) - 1;
109  const int valid_a = bits_a[i] & mask;
110  const int valid_b = bits_b[i] & mask;
111  if (valid_a != valid_b) {
112  fprintf(stderr, "Mismatch in last byte %" PRIu64 ": %d != %d\n",
113  static_cast<uint64_t>(i), valid_a, valid_b);
114  Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
115  Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
116  hwy::Abort(filename, line, "Masks not equal");
117  }
118  }
119 }
120 
121 // Only sets valid elements (those whose index < Lanes(d)). This helps catch
122 // tests that are not masking off the (undefined) upper mask elements.
123 //
124 // TODO(janwas): with HWY_NOINLINE GCC zeros the upper half of AVX2 masks.
125 template <class D>
127  return FirstN(d, Lanes(d));
128 }
129 
130 template <class D>
132  const auto zero = Zero(RebindToSigned<D>());
133  return RebindMask(d, Lt(zero, zero));
134 }
135 
136 #ifndef HWY_ASSERT_EQ
137 
138 #define HWY_ASSERT_EQ(expected, actual) \
139  hwy::AssertEqual(expected, actual, hwy::TargetName(HWY_TARGET), __FILE__, \
140  __LINE__)
141 
142 #define HWY_ASSERT_ARRAY_EQ(expected, actual, count) \
143  hwy::AssertArrayEqual(expected, actual, count, hwy::TargetName(HWY_TARGET), \
144  __FILE__, __LINE__)
145 
146 #define HWY_ASSERT_STRING_EQ(expected, actual) \
147  hwy::AssertStringEqual(expected, actual, hwy::TargetName(HWY_TARGET), \
148  __FILE__, __LINE__)
149 
150 #define HWY_ASSERT_VEC_EQ(d, expected, actual) \
151  AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
152 
153 #define HWY_ASSERT_MASK_EQ(d, expected, actual) \
154  AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
155 
156 #endif // HWY_ASSERT_EQ
157 
158 namespace detail {
159 
160 // Helpers for instantiating tests with combinations of lane types / counts.
161 
162 // Calls Test for each CappedTag<T, N> where N is in [kMinLanes, kMul * kMinArg]
163 // and the resulting Lanes() is in [min_lanes, max_lanes]. The upper bound
164 // is required to ensure capped vectors remain extendable. Implemented by
165 // recursively halving kMul until it is zero.
166 template <typename T, size_t kMul, size_t kMinArg, class Test>
168  static void Do(size_t min_lanes, size_t max_lanes) {
170 
171  // If we already don't have enough lanes, stop.
172  const size_t lanes = Lanes(d);
173  if (lanes < min_lanes) return;
174 
175  if (lanes <= max_lanes) {
176  Test()(T(), d);
177  }
178  ForeachCappedR<T, kMul / 2, kMinArg, Test>::Do(min_lanes, max_lanes);
179  }
180 };
181 
182 // Base case to stop the recursion.
183 template <typename T, size_t kMinArg, class Test>
184 struct ForeachCappedR<T, 0, kMinArg, Test> {
185  static void Do(size_t, size_t) {}
186 };
187 
188 #if HWY_HAVE_SCALABLE
189 
190 template <typename T>
191 constexpr int MinPow2() {
192  // Highway follows RVV LMUL in that the smallest fraction is 1/8th (encoded
193  // as kPow2 == -3). The fraction also must not result in zero lanes for the
194  // smallest possible vector size, which is 128 bits even on RISC-V (with the
195  // application processor profile).
196  return HWY_MAX(-3, -static_cast<int>(CeilLog2(16 / sizeof(T))));
197 }
198 
199 // Iterates kPow2 upward through +3.
200 template <typename T, int kPow2, int kAddPow2, class Test>
201 struct ForeachShiftR {
202  static void Do(size_t min_lanes) {
203  const ScalableTag<T, kPow2 + kAddPow2> d;
204 
205  // Precondition: [kPow2, 3] + kAddPow2 is a valid fraction of the minimum
206  // vector size, so we always have enough lanes, except ForGEVectors.
207  if (Lanes(d) >= min_lanes) {
208  Test()(T(), d);
209  } else {
210  fprintf(stderr, "%d lanes < %d: T=%d pow=%d\n",
211  static_cast<int>(Lanes(d)), static_cast<int>(min_lanes),
212  static_cast<int>(sizeof(T)), kPow2 + kAddPow2);
213  HWY_ASSERT(min_lanes != 1);
214  }
215 
216  ForeachShiftR<T, kPow2 + 1, kAddPow2, Test>::Do(min_lanes);
217  }
218 };
219 
220 // Base case to stop the recursion.
221 template <typename T, int kAddPow2, class Test>
222 struct ForeachShiftR<T, 4, kAddPow2, Test> {
223  static void Do(size_t) {}
224 };
225 #else
226 // ForeachCappedR already handled all possible sizes.
227 #endif // HWY_HAVE_SCALABLE
228 
229 } // namespace detail
230 
231 // These 'adapters' call a test for all possible N or kPow2 subject to
232 // constraints such as "vectors must be extendable" or "vectors >= 128 bits".
233 // They may be called directly, or via For*Types. Note that for an adapter C,
234 // `C<Test>(T())` does not call the test - the correct invocation is
235 // `C<Test>()(T())`, or preferably `ForAllTypes(C<Test>())`. We check at runtime
236 // that operator() is called to prevent such bugs. Note that this is not
237 // thread-safe, but that is fine because C are typically local variables.
238 
239 // Calls Test for all power of two N in [1, Lanes(d) >> kPow2]. This is for
240 // ops that widen their input, e.g. Combine (not supported by HWY_SCALAR).
241 template <class Test, int kPow2 = 1>
243  mutable bool called_ = false;
244 
245  public:
247  if (!called_) {
248  HWY_ABORT("Test is incorrect, ensure operator() is called");
249  }
250  }
251 
252  template <typename T>
253  void operator()(T /*unused*/) const {
254  called_ = true;
255  constexpr size_t kMaxCapped = HWY_LANES(T);
256  // Skip CappedTag that are already full vectors.
257  const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
258  (void)kMaxCapped;
259  (void)max_lanes;
260 #if HWY_TARGET == HWY_SCALAR
261  // not supported
262 #else
263  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(1, max_lanes);
264 #if HWY_TARGET == HWY_RVV
265  // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
266  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(1);
267 #elif HWY_HAVE_SCALABLE
268  // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
269  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
270  Test>::Do(1);
271 #endif
272 #endif // HWY_SCALAR
273  }
274 };
275 
276 // Calls Test for all power of two N in [1 << kPow2, Lanes(d)]. This is for ops
277 // that narrow their input, e.g. UpperHalf.
278 template <class Test, int kPow2 = 1>
280  mutable bool called_ = false;
281 
282  public:
284  if (!called_) {
285  HWY_ABORT("Test is incorrect, ensure operator() is called");
286  }
287  }
288 
289  template <typename T>
290  void operator()(T /*unused*/) const {
291  called_ = true;
292  constexpr size_t kMinLanes = size_t{1} << kPow2;
293  constexpr size_t kMaxCapped = HWY_LANES(T);
294  // For shrinking, an upper limit is unnecessary.
295  constexpr size_t max_lanes = kMaxCapped;
296 
297  (void)kMinLanes;
298  (void)max_lanes;
299  (void)max_lanes;
300 #if HWY_TARGET == HWY_SCALAR
301  // not supported
302 #else
303  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
304  kMinLanes, max_lanes);
305 #if HWY_TARGET == HWY_RVV
306  // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
307  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
308  kMinLanes);
309 #elif HWY_HAVE_SCALABLE
310  // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
311  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
312  kMinLanes);
313 #endif
314 #endif // HWY_TARGET == HWY_SCALAR
315  }
316 };
317 
318 // Calls Test for all supported power of two vectors of at least kMinBits.
319 // Examples: AES or 64x64 require 128 bits, casts may require 64 bits.
320 template <size_t kMinBits, class Test>
322  mutable bool called_ = false;
323 
324  public:
326  if (!called_) {
327  HWY_ABORT("Test is incorrect, ensure operator() is called");
328  }
329  }
330 
331  template <typename T>
332  void operator()(T /*unused*/) const {
333  called_ = true;
334  constexpr size_t kMaxCapped = HWY_LANES(T);
335  constexpr size_t kMinLanes = kMinBits / 8 / sizeof(T);
336  // An upper limit is unnecessary.
337  constexpr size_t max_lanes = kMaxCapped;
338  (void)max_lanes;
339 #if HWY_TARGET == HWY_SCALAR
340  (void)kMinLanes; // not supported
341 #else
342  detail::ForeachCappedR<T, HWY_LANES(T) / kMinLanes, kMinLanes, Test>::Do(
343  kMinLanes, max_lanes);
344 #if HWY_TARGET == HWY_RVV
345  // Can be 0 (handled below) if kMinBits > 64.
346  constexpr size_t kRatio = 128 / kMinBits;
347  constexpr int kMinPow2 =
348  kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
349  // For each [kMinPow2, 3]; counter is [kMinPow2, 3].
350  detail::ForeachShiftR<T, kMinPow2, 0, Test>::Do(kMinLanes);
351 #elif HWY_HAVE_SCALABLE
352  // Can be 0 (handled below) if kMinBits > 128.
353  constexpr size_t kRatio = 128 / kMinBits;
354  constexpr int kMinPow2 =
355  kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
356  // For each [kMinPow2, 0]; counter is [kMinPow2 + 3, 3].
357  detail::ForeachShiftR<T, kMinPow2 + 3, -3, Test>::Do(kMinLanes);
358 #endif
359 #endif // HWY_TARGET == HWY_SCALAR
360  }
361 };
362 
363 template <class Test>
365 
366 // Calls Test for all N that can be promoted (not the same as Extendable because
367 // HWY_SCALAR has one lane). Also used for ZipLower, but not ZipUpper.
368 template <class Test, int kPow2 = 1>
370  mutable bool called_ = false;
371 
372  public:
374  if (!called_) {
375  HWY_ABORT("Test is incorrect, ensure operator() is called");
376  }
377  }
378 
379  template <typename T>
380  void operator()(T /*unused*/) const {
381  called_ = true;
382  constexpr size_t kFactor = size_t{1} << kPow2;
383  static_assert(kFactor >= 2 && kFactor * sizeof(T) <= sizeof(uint64_t), "");
384  constexpr size_t kMaxCapped = HWY_LANES(T);
385  constexpr size_t kMinLanes = kFactor;
386  // Skip CappedTag that are already full vectors.
387  const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
388  (void)kMaxCapped;
389  (void)kMinLanes;
390  (void)max_lanes;
391 #if HWY_TARGET == HWY_SCALAR
393 #else
394  // TODO(janwas): call Extendable if kMinLanes check not required?
395  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(kMinLanes,
396  max_lanes);
397 #if HWY_TARGET == HWY_RVV
398  // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
399  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(
400  kMinLanes);
401 #elif HWY_HAVE_SCALABLE
402  // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
403  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
404  Test>::Do(kMinLanes);
405 #endif
406 #endif // HWY_SCALAR
407  }
408 };
409 
410 // Calls Test for all N than can be demoted (not the same as Shrinkable because
411 // HWY_SCALAR has one lane).
412 template <class Test, int kPow2 = 1>
414  mutable bool called_ = false;
415 
416  public:
418  if (!called_) {
419  HWY_ABORT("Test is incorrect, ensure operator() is called");
420  }
421  }
422 
423  template <typename T>
424  void operator()(T /*unused*/) const {
425  called_ = true;
426  constexpr size_t kMinLanes = size_t{1} << kPow2;
427  constexpr size_t kMaxCapped = HWY_LANES(T);
428  // For shrinking, an upper limit is unnecessary.
429  constexpr size_t max_lanes = kMaxCapped;
430 
431  (void)kMinLanes;
432  (void)max_lanes;
433  (void)max_lanes;
434 #if HWY_TARGET == HWY_SCALAR
436 #else
437  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
438  kMinLanes, max_lanes);
439 
440 // TODO(janwas): call Extendable if kMinLanes check not required?
441 #if HWY_TARGET == HWY_RVV
442  // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
443  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
444  kMinLanes);
445 #elif HWY_HAVE_SCALABLE
446  // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
447  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
448  kMinLanes);
449 #endif
450 #endif // HWY_TARGET == HWY_SCALAR
451  }
452 };
453 
454 // For LowerHalf/Quarter.
455 template <class Test, int kPow2 = 1>
457  mutable bool called_ = false;
458 
459  public:
461  if (!called_) {
462  HWY_ABORT("Test is incorrect, ensure operator() is called");
463  }
464  }
465 
466  template <typename T>
467  void operator()(T /*unused*/) const {
468  called_ = true;
469 #if HWY_TARGET == HWY_SCALAR
471 #else
472  constexpr size_t kMinLanes = size_t{1} << kPow2;
473  // For shrinking, an upper limit is unnecessary.
474  constexpr size_t kMaxCapped = HWY_LANES(T);
475  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
476  kMinLanes, kMaxCapped);
477 
478 // TODO(janwas): call Extendable if kMinLanes check not required?
479 #if HWY_TARGET == HWY_RVV
480  // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
481  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
482  kMinLanes);
483 #elif HWY_HAVE_SCALABLE
484  // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
485  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
486  kMinLanes);
487 #endif
488 #endif // HWY_TARGET == HWY_SCALAR
489  }
490 };
491 
492 // Calls Test for all power of two N in [1, Lanes(d)]. This is the default
493 // for ops that do not narrow nor widen their input, nor require 128 bits.
494 template <class Test>
496  mutable bool called_ = false;
497 
498  public:
500  if (!called_) {
501  HWY_ABORT("Test is incorrect, ensure operator() is called");
502  }
503  }
504 
505  template <typename T>
506  void operator()(T t) const {
507  called_ = true;
508 #if HWY_TARGET == HWY_SCALAR
509  (void)t;
511 #else
513 #endif
514  }
515 };
516 
517 // Type lists to shorten call sites:
518 
519 template <class Func>
520 void ForSignedTypes(const Func& func) {
521  func(int8_t());
522  func(int16_t());
523  func(int32_t());
524 #if HWY_HAVE_INTEGER64
525  func(int64_t());
526 #endif
527 }
528 
529 template <class Func>
530 void ForUnsignedTypes(const Func& func) {
531  func(uint8_t());
532  func(uint16_t());
533  func(uint32_t());
534 #if HWY_HAVE_INTEGER64
535  func(uint64_t());
536 #endif
537 }
538 
539 template <class Func>
540 void ForIntegerTypes(const Func& func) {
541  ForSignedTypes(func);
542  ForUnsignedTypes(func);
543 }
544 
545 template <class Func>
546 void ForFloatTypes(const Func& func) {
547  func(float());
548 #if HWY_HAVE_FLOAT64
549  func(double());
550 #endif
551 }
552 
553 template <class Func>
554 void ForAllTypes(const Func& func) {
555  ForIntegerTypes(func);
556  ForFloatTypes(func);
557 }
558 
559 template <class Func>
560 void ForUI8(const Func& func) {
561  func(uint8_t());
562  func(int8_t());
563 }
564 
565 template <class Func>
566 void ForUI16(const Func& func) {
567  func(uint16_t());
568  func(int16_t());
569 }
570 
571 template <class Func>
572 void ForUIF16(const Func& func) {
573  ForUI16(func);
574 #if HWY_HAVE_FLOAT16
575  func(float16_t());
576 #endif
577 }
578 
579 template <class Func>
580 void ForUI32(const Func& func) {
581  func(uint32_t());
582  func(int32_t());
583 }
584 
585 template <class Func>
586 void ForUIF32(const Func& func) {
587  ForUI32(func);
588  func(float());
589 }
590 
591 template <class Func>
592 void ForUI64(const Func& func) {
593 #if HWY_HAVE_INTEGER64
594  func(uint64_t());
595  func(int64_t());
596 #endif
597 }
598 
599 template <class Func>
600 void ForUIF64(const Func& func) {
601  ForUI64(func);
602 #if HWY_HAVE_FLOAT64
603  func(double());
604 #endif
605 }
606 
607 template <class Func>
608 void ForUI3264(const Func& func) {
609  ForUI32(func);
610  ForUI64(func);
611 }
612 
613 template <class Func>
614 void ForUIF3264(const Func& func) {
615  ForUIF32(func);
616  ForUIF64(func);
617 }
618 
619 template <class Func>
620 void ForUI163264(const Func& func) {
621  ForUI16(func);
622  ForUI3264(func);
623 }
624 
625 template <class Func>
626 void ForUIF163264(const Func& func) {
627  ForUIF16(func);
628  ForUIF3264(func);
629 }
630 
631 // For tests that involve loops, adjust the trip count so that emulated tests
632 // finish quickly (but always at least 2 iterations to ensure some diversity).
633 constexpr size_t AdjustedReps(size_t max_reps) {
634 #if HWY_ARCH_RVV
635  return HWY_MAX(max_reps / 32, 2);
636 #elif HWY_IS_DEBUG_BUILD
637  return HWY_MAX(max_reps / 8, 2);
638 #elif HWY_ARCH_ARM
639  return HWY_MAX(max_reps / 4, 2);
640 #else
641  return HWY_MAX(max_reps, 2);
642 #endif
643 }
644 
645 // Same as above, but the loop trip count will be 1 << max_pow2.
646 constexpr size_t AdjustedLog2Reps(size_t max_pow2) {
647  // If "negative" (unsigned wraparound), use original.
648 #if HWY_ARCH_RVV
649  return HWY_MIN(max_pow2 - 4, max_pow2);
650 #elif HWY_IS_DEBUG_BUILD
651  return HWY_MIN(max_pow2 - 1, max_pow2);
652 #elif HWY_ARCH_ARM
653  return HWY_MIN(max_pow2 - 1, max_pow2);
654 #else
655  return max_pow2;
656 #endif
657 }
658 
659 // NOLINTNEXTLINE(google-readability-namespace-comments)
660 } // namespace HWY_NAMESPACE
661 } // namespace hwy
663 
664 #endif // per-target include guard
#define HWY_MAX(a, b)
Definition: base.h:126
#define HWY_NOINLINE
Definition: base.h:63
#define HWY_MIN(a, b)
Definition: base.h:125
#define HWY_ABORT(format,...)
Definition: base.h:141
#define HWY_INLINE
Definition: base.h:62
#define HWY_ASSERT(condition)
Definition: base.h:145
Definition: test_util-inl.h:413
~ForDemoteVectors()
Definition: test_util-inl.h:417
void operator()(T) const
Definition: test_util-inl.h:424
bool called_
Definition: test_util-inl.h:414
Definition: test_util-inl.h:242
void operator()(T) const
Definition: test_util-inl.h:253
bool called_
Definition: test_util-inl.h:243
~ForExtendableVectors()
Definition: test_util-inl.h:246
Definition: test_util-inl.h:321
bool called_
Definition: test_util-inl.h:322
~ForGEVectors()
Definition: test_util-inl.h:325
void operator()(T) const
Definition: test_util-inl.h:332
Definition: test_util-inl.h:456
~ForHalfVectors()
Definition: test_util-inl.h:460
bool called_
Definition: test_util-inl.h:457
void operator()(T) const
Definition: test_util-inl.h:467
Definition: test_util-inl.h:495
bool called_
Definition: test_util-inl.h:496
void operator()(T t) const
Definition: test_util-inl.h:506
~ForPartialVectors()
Definition: test_util-inl.h:499
Definition: test_util-inl.h:369
~ForPromoteVectors()
Definition: test_util-inl.h:373
bool called_
Definition: test_util-inl.h:370
void operator()(T) const
Definition: test_util-inl.h:380
Definition: test_util-inl.h:279
void operator()(T) const
Definition: test_util-inl.h:290
bool called_
Definition: test_util-inl.h:280
~ForShrinkableVectors()
Definition: test_util-inl.h:283
#define HWY_TARGET
Definition: detect_targets.h:341
d
Definition: rvv-inl.h:1742
V VecArg
Definition: ops/shared-inl.h:306
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:2189
constexpr size_t AdjustedReps(size_t max_reps)
Definition: test_util-inl.h:633
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:5305
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6309
void ForUIF32(const Func &func)
Definition: test_util-inl.h:586
void ForUI163264(const Func &func)
Definition: test_util-inl.h:620
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2409
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:5290
constexpr HWY_API size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:236
void ForUIF3264(const Func &func)
Definition: test_util-inl.h:614
void ForUIF163264(const Func &func)
Definition: test_util-inl.h:626
constexpr size_t AdjustedLog2Reps(size_t max_pow2)
Definition: test_util-inl.h:646
typename detail::CappedTagChecker< T, kLimit >::type CappedTag
Definition: ops/shared-inl.h:172
void ForUI32(const Func &func)
Definition: test_util-inl.h:580
void ForAllTypes(const Func &func)
Definition: test_util-inl.h:554
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: ops/shared-inl.h:198
void ForFloatTypes(const Func &func)
Definition: test_util-inl.h:546
void Print(const D d, const char *caption, VecArg< V > v, size_t lane_u=0, size_t max_lanes=7)
Definition: print-inl.h:39
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:5269
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:2182
HWY_INLINE void AssertVecEqual(D d, const T *expected, VecArg< V > actual, const char *filename, const int line)
Definition: test_util-inl.h:43
void ForIntegerTypes(const Func &func)
Definition: test_util-inl.h:540
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2706
void ForUI8(const Func &func)
Definition: test_util-inl.h:560
void ForUI3264(const Func &func)
Definition: test_util-inl.h:608
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition: ops/shared-inl.h:161
HWY_API bool AllFalse(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:5299
void ForUIF64(const Func &func)
Definition: test_util-inl.h:600
void ForUI16(const Func &func)
Definition: test_util-inl.h:566
typename D::template Rebind< T > Rebind
Definition: ops/shared-inl.h:195
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:1011
void ForUI64(const Func &func)
Definition: test_util-inl.h:592
void ForSignedTypes(const Func &func)
Definition: test_util-inl.h:520
HWY_NOINLINE void AssertMaskEqual(D d, VecArg< Mask< D >> a, VecArg< Mask< D >> b, const char *filename, int line)
Definition: test_util-inl.h:68
void ForUIF16(const Func &func)
Definition: test_util-inl.h:572
typename D::template Repartition< T > Repartition
Definition: ops/shared-inl.h:206
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:38
N
Definition: rvv-inl.h:1742
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2882
HWY_INLINE Mask< D > MaskFalse(const D d)
Definition: test_util-inl.h:131
HWY_INLINE Mask< D > MaskTrue(const D d)
Definition: test_util-inl.h:126
void ForUnsignedTypes(const Func &func)
Definition: test_util-inl.h:530
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:32
HWY_TEST_DLLEXPORT void AssertArrayEqual(const TypeInfo &info, const void *expected_void, const void *actual_void, size_t N, const char *target_name, const char *filename, int line)
Definition: aligned_allocator.h:27
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:105
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:77
constexpr size_t CeilLog2(TI x)
Definition: base.h:777
HWY_INLINE void AssertEqual(const T expected, const T actual, const char *target_name, const char *filename, int line, size_t lane=0)
Definition: test_util.h:151
HWY_DLLEXPORT HWY_NORETURN void int line
Definition: base.h:848
#define HWY_LANES(T)
Definition: set_macros-inl.h:85
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
static void Do(size_t, size_t)
Definition: test_util-inl.h:185
Definition: test_util-inl.h:167
static void Do(size_t min_lanes, size_t max_lanes)
Definition: test_util-inl.h:168
Definition: base.h:246
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()