25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
28 #if __cplusplus >= 201703L
35 #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
36 namespace std _GLIBCXX_VISIBILITY(default) \
38 _GLIBCXX_BEGIN_NAMESPACE_VERSION \
39 namespace experimental { \
40 inline namespace parallelism_v2 {
41 #define _GLIBCXX_SIMD_END_NAMESPACE \
44 _GLIBCXX_END_NAMESPACE_VERSION \
49 #if defined __ARM_NEON
50 #define _GLIBCXX_SIMD_HAVE_NEON 1
52 #define _GLIBCXX_SIMD_HAVE_NEON 0
54 #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
55 #define _GLIBCXX_SIMD_HAVE_NEON_A32 1
57 #define _GLIBCXX_SIMD_HAVE_NEON_A32 0
59 #if defined __ARM_NEON && defined __aarch64__
60 #define _GLIBCXX_SIMD_HAVE_NEON_A64 1
62 #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
64 #if (__ARM_FEATURE_SVE_BITS > 0 && __ARM_FEATURE_SVE_VECTOR_OPERATORS==1)
65 #define _GLIBCXX_SIMD_HAVE_SVE 1
67 #define _GLIBCXX_SIMD_HAVE_SVE 0
69 #ifdef __ARM_FEATURE_SVE2
70 #define _GLIBCXX_SIMD_HAVE_SVE2 1
72 #define _GLIBCXX_SIMD_HAVE_SVE2 0
77 #define _GLIBCXX_SIMD_HAVE_MMX 1
79 #define _GLIBCXX_SIMD_HAVE_MMX 0
81 #if defined __SSE__ || defined __x86_64__
82 #define _GLIBCXX_SIMD_HAVE_SSE 1
84 #define _GLIBCXX_SIMD_HAVE_SSE 0
86 #if defined __SSE2__ || defined __x86_64__
87 #define _GLIBCXX_SIMD_HAVE_SSE2 1
89 #define _GLIBCXX_SIMD_HAVE_SSE2 0
92 #define _GLIBCXX_SIMD_HAVE_SSE3 1
94 #define _GLIBCXX_SIMD_HAVE_SSE3 0
97 #define _GLIBCXX_SIMD_HAVE_SSSE3 1
99 #define _GLIBCXX_SIMD_HAVE_SSSE3 0
102 #define _GLIBCXX_SIMD_HAVE_SSE4_1 1
104 #define _GLIBCXX_SIMD_HAVE_SSE4_1 0
107 #define _GLIBCXX_SIMD_HAVE_SSE4_2 1
109 #define _GLIBCXX_SIMD_HAVE_SSE4_2 0
112 #define _GLIBCXX_SIMD_HAVE_XOP 1
114 #define _GLIBCXX_SIMD_HAVE_XOP 0
117 #define _GLIBCXX_SIMD_HAVE_AVX 1
119 #define _GLIBCXX_SIMD_HAVE_AVX 0
122 #define _GLIBCXX_SIMD_HAVE_AVX2 1
124 #define _GLIBCXX_SIMD_HAVE_AVX2 0
127 #define _GLIBCXX_SIMD_HAVE_BMI1 1
129 #define _GLIBCXX_SIMD_HAVE_BMI1 0
132 #define _GLIBCXX_SIMD_HAVE_BMI2 1
134 #define _GLIBCXX_SIMD_HAVE_BMI2 0
137 #define _GLIBCXX_SIMD_HAVE_LZCNT 1
139 #define _GLIBCXX_SIMD_HAVE_LZCNT 0
142 #define _GLIBCXX_SIMD_HAVE_SSE4A 1
144 #define _GLIBCXX_SIMD_HAVE_SSE4A 0
147 #define _GLIBCXX_SIMD_HAVE_FMA 1
149 #define _GLIBCXX_SIMD_HAVE_FMA 0
152 #define _GLIBCXX_SIMD_HAVE_FMA4 1
154 #define _GLIBCXX_SIMD_HAVE_FMA4 0
157 #define _GLIBCXX_SIMD_HAVE_F16C 1
159 #define _GLIBCXX_SIMD_HAVE_F16C 0
162 #define _GLIBCXX_SIMD_HAVE_POPCNT 1
164 #define _GLIBCXX_SIMD_HAVE_POPCNT 0
167 #define _GLIBCXX_SIMD_HAVE_AVX512F 1
169 #define _GLIBCXX_SIMD_HAVE_AVX512F 0
172 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
174 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
177 #define _GLIBCXX_SIMD_HAVE_AVX512VL 1
179 #define _GLIBCXX_SIMD_HAVE_AVX512VL 0
182 #define _GLIBCXX_SIMD_HAVE_AVX512BW 1
184 #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
186 #ifdef __AVX512BITALG__
187 #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1
189 #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0
191 #ifdef __AVX512VBMI2__
192 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1
194 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0
196 #ifdef __AVX512VBMI__
197 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1
199 #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0
201 #ifdef __AVX512IFMA__
202 #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1
204 #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0
207 #define _GLIBCXX_SIMD_HAVE_AVX512CD 1
209 #define _GLIBCXX_SIMD_HAVE_AVX512CD 0
211 #ifdef __AVX512VNNI__
212 #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1
214 #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0
216 #ifdef __AVX512VPOPCNTDQ__
217 #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1
219 #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0
221 #ifdef __AVX512VP2INTERSECT__
222 #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1
224 #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0
227 #if _GLIBCXX_SIMD_HAVE_SSE
228 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
230 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
232 #if _GLIBCXX_SIMD_HAVE_SSE2
233 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
235 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
238 #if _GLIBCXX_SIMD_HAVE_AVX
239 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
241 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
243 #if _GLIBCXX_SIMD_HAVE_AVX2
244 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
246 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
249 #if _GLIBCXX_SIMD_HAVE_AVX512F
250 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
252 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
254 #if _GLIBCXX_SIMD_HAVE_AVX512BW
255 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
257 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
260 #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
261 #error "Use of SSE2 is required on AMD64"
265 #ifdef _GLIBCXX_CLANG
266 #define _GLIBCXX_SIMD_NORMAL_MATH
267 #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
269 #define _GLIBCXX_SIMD_NORMAL_MATH \
270 [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
271 #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
273 #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
274 #define _GLIBCXX_SIMD_INTRINSIC \
275 [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
276 #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
277 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
278 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
280 #if _GLIBCXX_SIMD_HAVE_SVE || __STRICT_ANSI__ || defined _GLIBCXX_CLANG
281 #define _GLIBCXX_SIMD_CONSTEXPR
282 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
284 #define _GLIBCXX_SIMD_CONSTEXPR constexpr
285 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
288 #if defined _GLIBCXX_CLANG
289 #define _GLIBCXX_SIMD_USE_CONSTEXPR const
291 #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
294 #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
295 #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
296 #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
297 __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
299 #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
300 _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
301 #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
302 _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
303 #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
304 _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
306 #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
307 #undef _GLIBCXX_SIMD_ALWAYS_INLINE
308 #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
309 #undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
310 #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
311 #undef _GLIBCXX_SIMD_INTRINSIC
312 #define _GLIBCXX_SIMD_INTRINSIC inline
315 #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
316 #define _GLIBCXX_SIMD_X86INTRIN 1
318 #define _GLIBCXX_SIMD_X86INTRIN 0
325 #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
328 #if _GLIBCXX_SIMD_X86INTRIN
329 #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
333 #ifndef _GLIBCXX_CLANG
334 #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
339 #if _GLIBCXX_SIMD_X86INTRIN
340 #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
344 #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
347 #if _GLIBCXX_SIMD_X86INTRIN
348 #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
353 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
357 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1