-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathavx2-strstr-v2-clang-specific.cpp
42 lines (34 loc) · 1.38 KB
/
avx2-strstr-v2-clang-specific.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
/*
The following templates implement the loop, where K is a template parameter.
for (unsigned i=1; i < K; i++) {
const __m256i substring = _mm256_alignr_epi8(next1, curr, i);
eq = _mm256_and_si256(eq, _mm256_cmpeq_epi8(substring, broadcasted[i]));
}
Clang complains that the loop parameter `i` is a variable and it cannot be
applied as a parameter _mm256_alignr_epi8. GCC somehow deals with it.
*/
#ifdef __clang__
template <size_t K, int i, bool terminate>
struct inner_loop_aux;
template <size_t K, int i>
struct inner_loop_aux<K, i, false> {
void operator()(__m256i& eq, const __m256i& next1, const __m256i& curr, const __m256i (&broadcasted)[K]) {
const __m256i substring = _mm256_alignr_epi8(next1, curr, i);
eq = _mm256_and_si256(eq, _mm256_cmpeq_epi8(substring, broadcasted[i]));
inner_loop_aux<K, i + 1, i + 1 == K>()(eq, next1, curr, broadcasted);
}
};
template <size_t K, int i>
struct inner_loop_aux<K, i, true> {
void operator()(__m256i&, const __m256i&, const __m256i&, const __m256i (&)[K]) {
// nop
}
};
template <size_t K>
struct inner_loop {
void operator()(__m256i& eq, const __m256i& next1, const __m256i& curr, const __m256i (&broadcasted)[K]) {
static_assert(K > 0, "wrong value");
inner_loop_aux<K, 0, false>()(eq, next1, curr, broadcasted);
}
};
#endif