Skip to content

小彭老师,我想请教一下下面这段代码为什么在-O2的情况下,avx2的版本比sse2的版本性能要差呢?(x86 gcc version 11.2.1) #9

@AJ-mider

Description

@AJ-mider

// sse2 version
template
inline bool bytescompare(const Char* a, const Char* b, size_t n)
{
size_t offset = 0;
size_t offset_end = n / 16 * 16;
#ifdef SSE2

for (; offset < offset_end; offset += 16)
{
    const __m128i vec_1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(a + offset));
    const __m128i vec_2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
    __m128i compare_result = _mm_cmpeq_epi8(vec_1, vec_2);
    int mask = _mm_movemask_epi8(compare_result);
    if (mask != 0xFFFF) return false;
}

#endif
for (; offset < n; ++offset)
{
if (a[offset] != b[offset]) return false;
}
return true;
}

// avx version
template
inline bool bytescompare_avx(const Char* a, const Char* b, size_t n)
{
size_t offset = 0;
size_t offset_end = n / 32 * 32;
#ifdef AVX2
for (; offset < offset_end; offset += 32)
{
const __m256i vec_1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
const __m256i vec_2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
__m256i compare_result = _mm256_cmpeq_epi8(vec_1, vec_2);
int mask = _mm256_movemask_epi8(compare_result);
if (mask != 0xFFFFFFFF) return false;
}
#endif
for (; offset < n; ++offset)
{
if (a[offset] != b[offset]) return false;
}
return true;
}

其中测试用例如下:
void test_for_bytescompare()
{
std::srand(static_cast(std::time(nullptr))); // 初始化随机数生成器
const size_t arraySize = 320000; // 设置数组大小
// 生成两个随机字节数组
char array1[arraySize];
char array2[arraySize];
generateRandomByteArray(array1, arraySize);
std::copy(array1, array1 + arraySize, array2);
auto start = chrono::high_resolution_clock::now();
bool res = NBSimdBooster::bytescompare_avx(array1, array2, arraySize);
auto end = chrono::high_resolution_clock::now();
std::chrono::duration elapsed_seconds = end - start;
cout << "time: " << elapsed_seconds.count() << endl;
cout << res << endl;
}
其中
编译命令:g++ -O2 test.cpp -o pj1 -mavx2
sse版本耗时:1.4581e-05s
avx版本耗时:2.7021e-05s

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions