-
Notifications
You must be signed in to change notification settings - Fork 12
Description
// sse2 version
template
inline bool bytescompare(const Char* a, const Char* b, size_t n)
{
size_t offset = 0;
size_t offset_end = n / 16 * 16;
#ifdef SSE2
for (; offset < offset_end; offset += 16)
{
const __m128i vec_1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(a + offset));
const __m128i vec_2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
__m128i compare_result = _mm_cmpeq_epi8(vec_1, vec_2);
int mask = _mm_movemask_epi8(compare_result);
if (mask != 0xFFFF) return false;
}
#endif
for (; offset < n; ++offset)
{
if (a[offset] != b[offset]) return false;
}
return true;
}
// avx version
template
inline bool bytescompare_avx(const Char* a, const Char* b, size_t n)
{
size_t offset = 0;
size_t offset_end = n / 32 * 32;
#ifdef AVX2
for (; offset < offset_end; offset += 32)
{
const __m256i vec_1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
const __m256i vec_2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
__m256i compare_result = _mm256_cmpeq_epi8(vec_1, vec_2);
int mask = _mm256_movemask_epi8(compare_result);
if (mask != 0xFFFFFFFF) return false;
}
#endif
for (; offset < n; ++offset)
{
if (a[offset] != b[offset]) return false;
}
return true;
}
其中测试用例如下:
void test_for_bytescompare()
{
std::srand(static_cast(std::time(nullptr))); // 初始化随机数生成器
const size_t arraySize = 320000; // 设置数组大小
// 生成两个随机字节数组
char array1[arraySize];
char array2[arraySize];
generateRandomByteArray(array1, arraySize);
std::copy(array1, array1 + arraySize, array2);
auto start = chrono::high_resolution_clock::now();
bool res = NBSimdBooster::bytescompare_avx(array1, array2, arraySize);
auto end = chrono::high_resolution_clock::now();
std::chrono::duration elapsed_seconds = end - start;
cout << "time: " << elapsed_seconds.count() << endl;
cout << res << endl;
}
其中
编译命令:g++ -O2 test.cpp -o pj1 -mavx2
sse版本耗时:1.4581e-05s
avx版本耗时:2.7021e-05s