Grok  10.0.3
targets.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #ifndef HIGHWAY_HWY_TARGETS_H_
17 #define HIGHWAY_HWY_TARGETS_H_
18 
19 #include <vector>
20 
21 // For SIMD module implementations and their callers. Defines which targets to
22 // generate and call.
23 
24 #include "hwy/base.h"
25 #include "hwy/detect_targets.h"
26 #include "hwy/highway_export.h"
27 
28 #if !HWY_ARCH_RVV
29 #include <atomic>
30 #endif
31 
32 namespace hwy {
33 
34 // Returns bitfield of enabled targets that are supported on this CPU; there is
35 // always at least one such target, hence the return value is never 0. The
36 // targets returned may change after calling DisableTargets. This function is
37 // always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
38 // calls to it if there is only a single target enabled.
40 
41 // Evaluates to a function call, or literal if there is a single target.
42 #if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
43 #define HWY_SUPPORTED_TARGETS HWY_TARGETS
44 #else
45 #define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
46 #endif
47 
48 // Subsequent SupportedTargets will not return targets whose bit(s) are set in
49 // `disabled_targets`. Exception: if SupportedTargets would return 0, it will
50 // instead return HWY_STATIC_TARGET (there must always be one target to call).
51 //
52 // This function is useful for disabling targets known to be buggy, or if the
53 // best available target is undesirable (perhaps due to throttling or memory
54 // bandwidth limitations). Use SetSupportedTargetsForTest instead of this
55 // function for iteratively enabling specific targets for testing.
56 HWY_DLLEXPORT void DisableTargets(uint32_t disabled_targets);
57 
58 // Subsequent SupportedTargets will return the given set of targets, except
59 // those disabled via DisableTargets. Call with a mask of 0 to disable the mock
60 // and return to the normal SupportedTargets behavior. Used to run tests for
61 // all targets.
63 
64 // Return the list of targets in HWY_TARGETS supported by the CPU as a list of
65 // individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
66 // is affected by the current SetSupportedTargetsForTest() mock if any.
67 HWY_INLINE std::vector<uint32_t> SupportedAndGeneratedTargets() {
68  std::vector<uint32_t> ret;
69  for (uint32_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
70  targets = targets & (targets - 1)) {
71  uint32_t current_target = targets & ~(targets - 1);
72  ret.push_back(current_target);
73  }
74  return ret;
75 }
76 
77 static inline HWY_MAYBE_UNUSED const char* TargetName(uint32_t target) {
78  switch (target) {
79 #if HWY_ARCH_X86
80  case HWY_SSSE3:
81  return "SSSE3";
82  case HWY_SSE4:
83  return "SSE4";
84  case HWY_AVX2:
85  return "AVX2";
86  case HWY_AVX3:
87  return "AVX3";
88  case HWY_AVX3_DL:
89  return "AVX3_DL";
90 #endif
91 
92 #if HWY_ARCH_ARM
93  case HWY_SVE2_128:
94  return "SVE2_128";
95  case HWY_SVE_256:
96  return "SVE_256";
97  case HWY_SVE2:
98  return "SVE2";
99  case HWY_SVE:
100  return "SVE";
101  case HWY_NEON:
102  return "Neon";
103 #endif
104 
105 #if HWY_ARCH_PPC
106  case HWY_PPC8:
107  return "Power8";
108 #endif
109 
110 #if HWY_ARCH_WASM
111  case HWY_WASM:
112  return "Wasm";
113  case HWY_WASM_EMU256:
114  return "Wasm2";
115 #endif
116 
117 #if HWY_ARCH_RVV
118  case HWY_RVV:
119  return "RVV";
120 #endif
121 
122  case HWY_EMU128:
123  return "Emu128";
124  case HWY_SCALAR:
125  return "Scalar";
126 
127  default:
128  return "Unknown"; // must satisfy gtest IsValidParamName()
129  }
130 }
131 
132 // The maximum number of dynamic targets on any architecture is defined by
133 // HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
134 
135 // For the ChosenTarget mask and index we use a different bit arrangement than
136 // in the HWY_TARGETS mask. Only the targets involved in the current
137 // architecture are used in this mask, and therefore only the least significant
138 // (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the uint32_t mask are used. The least
139 // significant bit is set when the mask is not initialized, the next
140 // HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
141 // HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
142 // that position and the next more significant bit is used for HWY_SCALAR (if
143 // HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
144 // define equivalent values for HWY_TARGETS in this representation.
145 // This mask representation allows to use ctz() on this mask and obtain a small
146 // number that's used as an index of the table for dynamic dispatch. In this
147 // way the first entry is used when the mask is uninitialized, the following
148 // HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
149 // scalar.
150 
151 // The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
152 #define HWY_CHOSEN_TARGET_MASK_SCALAR (1u << (HWY_MAX_DYNAMIC_TARGETS + 1))
153 
154 // Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
155 // current architecture.
156 #define HWY_CHOSEN_TARGET_SHIFT(X) \
157  ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
158  ((1u << HWY_MAX_DYNAMIC_TARGETS) - 1)) \
159  << 1)
160 
161 // The HWY_TARGETS mask in the ChosenTarget mask format.
162 #define HWY_CHOSEN_TARGET_MASK_TARGETS \
163  (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1u)
164 
165 #if HWY_ARCH_X86
166 // Maximum number of dynamic targets, changing this value is an ABI incompatible
167 // change
168 #define HWY_MAX_DYNAMIC_TARGETS 10
169 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
170 // These must match the order in which the HWY_TARGETS are defined
171 // starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
172 // HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
173 // HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
174 // corresponds to the best target. Don't include a "," at the end of the list.
175 #define HWY_CHOOSE_TARGET_LIST(func_name) \
176  nullptr, /* reserved */ \
177  nullptr, /* reserved */ \
178  nullptr, /* reserved */ \
179  HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \
180  HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \
181  HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \
182  nullptr, /* AVX */ \
183  HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \
184  HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \
185  nullptr /* SSE3 or SSE2 */
186 
187 #elif HWY_ARCH_ARM
188 // See HWY_ARCH_X86 above for details.
189 #define HWY_MAX_DYNAMIC_TARGETS 8
190 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
191 #define HWY_CHOOSE_TARGET_LIST(func_name) \
192  nullptr, /* reserved */ \
193  nullptr, /* reserved */ \
194  HWY_CHOOSE_SVE2_128(func_name), /* SVE2 128-bit */ \
195  HWY_CHOOSE_SVE_256(func_name), /* SVE 256-bit */ \
196  HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \
197  HWY_CHOOSE_SVE(func_name), /* SVE */ \
198  nullptr, /* reserved */ \
199  HWY_CHOOSE_NEON(func_name) /* NEON */
200 
201 #elif HWY_ARCH_PPC
202 // See HWY_ARCH_X86 above for details.
203 #define HWY_MAX_DYNAMIC_TARGETS 3
204 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
205 #define HWY_CHOOSE_TARGET_LIST(func_name) \
206  nullptr, /* reserved */ \
207  HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \
208  nullptr /* VSX or AltiVec */
209 
210 #elif HWY_ARCH_WASM
211 // See HWY_ARCH_X86 above for details.
212 #define HWY_MAX_DYNAMIC_TARGETS 4
213 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
214 #define HWY_CHOOSE_TARGET_LIST(func_name) \
215  nullptr, /* reserved */ \
216  nullptr, /* reserved */ \
217  HWY_CHOOSE_WASM2(func_name), /* WASM2 */ \
218  HWY_CHOOSE_WASM(func_name) /* WASM */
219 
220 #elif HWY_ARCH_RVV
221 // See HWY_ARCH_X86 above for details.
222 #define HWY_MAX_DYNAMIC_TARGETS 4
223 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
224 #define HWY_CHOOSE_TARGET_LIST(func_name) \
225  nullptr, /* reserved */ \
226  nullptr, /* reserved */ \
227  nullptr, /* reserved */ \
228  HWY_CHOOSE_RVV(func_name) /* RVV */
229 
230 #else
231 // Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
232 // still creating single-entry tables in HWY_EXPORT to ensure portability.
233 #define HWY_MAX_DYNAMIC_TARGETS 1
234 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
235 #endif
236 
237 // Bitfield of supported and enabled targets. The format differs from that of
238 // HWY_TARGETS; the lowest bit governs the first function pointer (which is
239 // special in that it calls FunctionCache, then Update, then dispatches to the
240 // actual implementation) in the tables created by HWY_EXPORT. Monostate (see
241 // GetChosenTarget), thread-safe except on RVV.
242 struct ChosenTarget {
243  public:
244  // Reset bits according to `targets` (typically the return value of
245  // SupportedTargets()). Postcondition: IsInitialized() == true.
246  void Update(uint32_t targets) {
247  // These are `targets` shifted downwards, see above. Also include SCALAR
248  // (corresponds to the last entry in the function table) as fallback.
250  }
251 
252  // Reset to the uninitialized state, so that FunctionCache will call Update
253  // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
254  void DeInit() { StoreMask(1); }
255 
256  // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
257  // function was called, which we check in tests.
258  bool IsInitialized() const { return LoadMask() != 1; }
259 
260  // Return the index in the dynamic dispatch table to be used by the current
261  // CPU. Note that this method must be in the header file so it uses the value
262  // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
263  // calls it, which may be different from others. This means we only enable
264  // those targets that were actually compiled in this module.
265  size_t HWY_INLINE GetIndex() const {
268  }
269 
270  private:
271  // TODO(janwas): remove #if once <atomic> is available
272 #if HWY_ARCH_RVV
273  uint32_t LoadMask() const { return mask_; }
274  void StoreMask(uint32_t mask) { mask_ = mask; }
275 
276  uint32_t mask_{1}; // Initialized to 1 so GetIndex() returns 0.
277 #else
278  uint32_t LoadMask() const { return mask_.load(); }
279  void StoreMask(uint32_t mask) { mask_.store(mask); }
280 
281  std::atomic<uint32_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0.
282 #endif // HWY_ARCH_RVV
283 };
284 
285 // For internal use (e.g. by FunctionCache and DisableTargets).
287 
288 } // namespace hwy
289 
290 #endif // HIGHWAY_HWY_TARGETS_H_
#define HWY_INLINE
Definition: base.h:62
#define HWY_MAYBE_UNUSED
Definition: base.h:73
#define HWY_WASM_EMU256
Definition: detect_targets.h:93
#define HWY_AVX3_DL
Definition: detect_targets.h:62
#define HWY_NEON
Definition: detect_targets.h:82
#define HWY_EMU128
Definition: detect_targets.h:103
#define HWY_PPC8
Definition: detect_targets.h:87
#define HWY_SVE2
Definition: detect_targets.h:79
#define HWY_AVX3
Definition: detect_targets.h:63
#define HWY_AVX2
Definition: detect_targets.h:64
#define HWY_SCALAR
Definition: detect_targets.h:104
#define HWY_SVE_256
Definition: detect_targets.h:78
#define HWY_SVE2_128
Definition: detect_targets.h:77
#define HWY_WASM
Definition: detect_targets.h:94
#define HWY_SVE
Definition: detect_targets.h:80
#define HWY_RVV
Definition: detect_targets.h:99
#define HWY_TARGETS
Definition: detect_targets.h:401
#define HWY_SSE4
Definition: detect_targets.h:66
#define HWY_SSSE3
Definition: detect_targets.h:67
#define HWY_DLLEXPORT
Definition: highway_export.h:13
Definition: aligned_allocator.h:27
HWY_DLLEXPORT void SetSupportedTargetsForTest(uint32_t targets)
HWY_DLLEXPORT ChosenTarget & GetChosenTarget()
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:674
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:77
HWY_DLLEXPORT uint32_t SupportedTargets()
HWY_INLINE std::vector< uint32_t > SupportedAndGeneratedTargets()
Definition: targets.h:67
HWY_DLLEXPORT void DisableTargets(uint32_t disabled_targets)
Definition: targets.h:242
uint32_t LoadMask() const
Definition: targets.h:278
void StoreMask(uint32_t mask)
Definition: targets.h:279
bool IsInitialized() const
Definition: targets.h:258
size_t HWY_INLINE GetIndex() const
Definition: targets.h:265
void DeInit()
Definition: targets.h:254
void Update(uint32_t targets)
Definition: targets.h:246
std::atomic< uint32_t > mask_
Definition: targets.h:281
#define HWY_CHOSEN_TARGET_MASK_TARGETS
Definition: targets.h:162
#define HWY_CHOSEN_TARGET_SHIFT(X)
Definition: targets.h:156
#define HWY_CHOSEN_TARGET_MASK_SCALAR
Definition: targets.h:152