Skip to content

Commit 9a9f7c4

Browse files
committed
[PERF][SIMD] add test to measure core operations
1 parent b923e4e commit 9a9f7c4

File tree

1 file changed

+271
-0
lines changed

1 file changed

+271
-0
lines changed

test/simd/test_simd_fnt.cpp

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@
2929
*/
3030
#include <vector>
3131

32+
#include <functional>
3233
#include <gtest/gtest.h>
3334

3435
#include "arith.h"
3536
#include "core.h"
37+
#include "misc.h"
3638
#include "vec_buffers.h"
3739

3840
#ifdef QUADIRON_USE_SIMD
@@ -98,6 +100,15 @@ class SimdTestFnt : public ::testing::Test {
98100
return vec[0];
99101
}
100102

103+
void gen_rand_data(std::vector<T>& vec)
104+
{
105+
const size_t len = vec.size();
106+
107+
for (size_t i = 0; i < len; i++) {
108+
vec[i] = distribution->operator()(quadiron::prng());
109+
}
110+
}
111+
101112
simd::VecType copy(simd::VecType x)
102113
{
103114
const size_t n = simd::countof<T>();
@@ -222,8 +233,73 @@ class SimdTestFnt : public ::testing::Test {
222233
x = simd::load_to_reg(reinterpret_cast<simd::VecType*>(x_buf));
223234
}
224235

236+
void core_op_perf_lambda(
237+
const std::string& text,
238+
const std::function<void(simd::VecType&, const simd::VecType&)>& f)
239+
{
240+
const size_t len = vec_len * simd::countof<T>();
241+
242+
std::vector<T> buf_x(len);
243+
std::vector<T> buf_y(len);
244+
gen_rand_data(buf_x);
245+
gen_rand_data(buf_y);
246+
247+
simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
248+
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
249+
250+
uint64_t start = quadiron::hw_timer();
251+
for (unsigned i = 0; i < iters_nb; ++i) {
252+
for (size_t j = 0; j < vec_len; ++j) {
253+
simd::VecType x = simd::load_to_reg(&data_x[i]);
254+
simd::VecType y = simd::load_to_reg(&data_y[i]);
255+
256+
f(x, y);
257+
258+
simd::store_to_mem(&data_x[i], x);
259+
}
260+
}
261+
uint64_t end = quadiron::hw_timer();
262+
double avg_cycles_nb =
263+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
264+
std::cout << "Average nb of CPU cycles per operation " << text << ": "
265+
<< avg_cycles_nb / vec_len << "\n";
266+
}
267+
268+
template <typename TFunc>
269+
void core_op_perf_template(const std::string& text, const TFunc& f)
270+
{
271+
const size_t len = vec_len * simd::countof<T>();
272+
273+
std::vector<T> buf_x(len);
274+
std::vector<T> buf_y(len);
275+
gen_rand_data(buf_x);
276+
gen_rand_data(buf_y);
277+
278+
simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
279+
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
280+
281+
uint64_t start = quadiron::hw_timer();
282+
for (unsigned i = 0; i < iters_nb; ++i) {
283+
for (size_t j = 0; j < vec_len; ++j) {
284+
simd::VecType x = simd::load_to_reg(&data_x[i]);
285+
simd::VecType y = simd::load_to_reg(&data_y[i]);
286+
287+
f(x, y);
288+
289+
simd::store_to_mem(&data_x[i], x);
290+
}
291+
}
292+
uint64_t end = quadiron::hw_timer();
293+
double avg_cycles_nb =
294+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
295+
std::cout << "Average nb of CPU cycles per operation " << text << ": "
296+
<< avg_cycles_nb / vec_len << "\n";
297+
}
298+
225299
T q;
226300
std::unique_ptr<std::uniform_int_distribution<uint32_t>> distribution;
301+
size_t vec_len = 256;
302+
size_t iters_nb = 1e3;
227303
};
228304

229305
using AllTypes = ::testing::Types<uint16_t, uint32_t>;
@@ -351,4 +427,199 @@ TYPED_TEST(SimdTestFnt, TestButterflyGs) // NOLINT
351427
}
352428
}
353429

430+
TYPED_TEST(SimdTestFnt, PerfModMulSingle) // NOLINT
431+
{
432+
const size_t iters_nb = 1e5;
433+
simd::VecType x = this->rand_vec();
434+
simd::VecType y = this->rand_vec();
435+
436+
uint64_t start = quadiron::hw_timer();
437+
for (unsigned i = 0; i < iters_nb; ++i) {
438+
simd::VecType _x = simd::load_to_reg(&x);
439+
simd::VecType _y = simd::load_to_reg(&y);
440+
441+
_x = simd::mod_mul<TypeParam>(_x, _y);
442+
443+
simd::store_to_mem(&x, _x);
444+
}
445+
uint64_t end = quadiron::hw_timer();
446+
double avg_cycles_nb =
447+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
448+
std::cout << "PerfModMulSingle: " << avg_cycles_nb << "\n";
449+
}
450+
451+
TYPED_TEST(SimdTestFnt, PerfModMulBuf) // NOLINT
452+
{
453+
const size_t iters_nb = 1e3;
454+
455+
const size_t len = this->vec_len * simd::countof<TypeParam>();
456+
std::vector<TypeParam> buf_x(len);
457+
std::vector<TypeParam> buf_y(len);
458+
this->gen_rand_data(buf_x);
459+
this->gen_rand_data(buf_y);
460+
461+
simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
462+
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
463+
464+
uint64_t start = quadiron::hw_timer();
465+
for (unsigned i = 0; i < iters_nb; ++i) {
466+
for (size_t j = 0; j < this->vec_len; ++j) {
467+
simd::VecType x = simd::load_to_reg(&data_x[i]);
468+
simd::VecType y = simd::load_to_reg(&data_y[i]);
469+
470+
x = simd::mod_mul<TypeParam>(x, y);
471+
472+
simd::store_to_mem(&data_x[i], x);
473+
}
474+
}
475+
uint64_t end = quadiron::hw_timer();
476+
double avg_cycles_nb =
477+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
478+
std::cout << "Perf of ModMul on buffer of "
479+
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
480+
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
481+
}
482+
483+
TYPED_TEST(SimdTestFnt, PerfModMulBufLambda) // NOLINT
484+
{
485+
this->core_op_perf_lambda(
486+
"[Lambda] ModMul", [](simd::VecType& x, const simd::VecType& y) {
487+
x = simd::mod_mul<TypeParam>(x, y);
488+
});
489+
}
490+
491+
TYPED_TEST(SimdTestFnt, PerfModMulBufTemplate) // NOLINT
492+
{
493+
this->core_op_perf_template(
494+
"[Template] ModMul", [](simd::VecType& x, const simd::VecType& y) {
495+
x = simd::mod_mul<TypeParam>(x, y);
496+
});
497+
}
498+
499+
TYPED_TEST(SimdTestFnt, PerfModAddBuf) // NOLINT
500+
{
501+
const size_t iters_nb = 1e3;
502+
503+
const size_t len = this->vec_len * simd::countof<TypeParam>();
504+
std::vector<TypeParam> buf_x(len);
505+
std::vector<TypeParam> buf_y(len);
506+
this->gen_rand_data(buf_x);
507+
this->gen_rand_data(buf_y);
508+
509+
simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
510+
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
511+
512+
uint64_t start = quadiron::hw_timer();
513+
for (unsigned i = 0; i < iters_nb; ++i) {
514+
for (size_t j = 0; j < this->vec_len; ++j) {
515+
simd::VecType x = simd::load_to_reg(&data_x[i]);
516+
simd::VecType y = simd::load_to_reg(&data_y[i]);
517+
518+
x = simd::mod_add<TypeParam>(x, y);
519+
520+
simd::store_to_mem(&data_x[i], x);
521+
}
522+
}
523+
uint64_t end = quadiron::hw_timer();
524+
double avg_cycles_nb =
525+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
526+
std::cout << "Perf of ModAdd on buffer of "
527+
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
528+
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
529+
}
530+
531+
TYPED_TEST(SimdTestFnt, PerfModSubBuf) // NOLINT
532+
{
533+
const size_t iters_nb = 1e3;
534+
535+
const size_t len = this->vec_len * simd::countof<TypeParam>();
536+
std::vector<TypeParam> buf_x(len);
537+
std::vector<TypeParam> buf_y(len);
538+
this->gen_rand_data(buf_x);
539+
this->gen_rand_data(buf_y);
540+
541+
simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
542+
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
543+
544+
std::vector<simd::VecType> data_z(this->vec_len);
545+
546+
uint64_t start = quadiron::hw_timer();
547+
for (unsigned i = 0; i < iters_nb; ++i) {
548+
for (size_t j = 0; j < this->vec_len; ++j) {
549+
simd::VecType x = simd::load_to_reg(&data_x[i]);
550+
simd::VecType y = simd::load_to_reg(&data_y[i]);
551+
552+
x = simd::mod_sub<TypeParam>(x, y);
553+
554+
simd::store_to_mem(&data_x[i], x);
555+
}
556+
}
557+
uint64_t end = quadiron::hw_timer();
558+
double avg_cycles_nb =
559+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
560+
std::cout << "Perf of ModSub on buffer of "
561+
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
562+
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
563+
}
564+
565+
TYPED_TEST(SimdTestFnt, PerfButterflyCt) // NOLINT
566+
{
567+
const size_t iters_nb = 1e3;
568+
569+
const size_t len = this->vec_len * simd::countof<TypeParam>();
570+
std::vector<TypeParam> buf_x(len);
571+
std::vector<TypeParam> buf_y(len);
572+
this->gen_rand_data(buf_x);
573+
this->gen_rand_data(buf_y);
574+
575+
simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
576+
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
577+
578+
std::vector<simd::VecType> data_z(this->vec_len);
579+
580+
TypeParam coef =
581+
1 + this->distribution->operator()(quadiron::prng()) % (this->q - 2);
582+
const simd::CtGsCase ct_case = simd::get_case<TypeParam>(coef, this->q);
583+
simd::VecType c = simd::set_one(coef);
584+
585+
uint64_t start = quadiron::hw_timer();
586+
for (unsigned i = 0; i < iters_nb; ++i) {
587+
for (size_t j = 0; j < this->vec_len; ++j) {
588+
simd::VecType x = simd::load_to_reg(&data_x[i]);
589+
simd::VecType y = simd::load_to_reg(&data_y[i]);
590+
591+
simd::butterfly_ct<TypeParam>(ct_case, c, x, y);
592+
593+
simd::store_to_mem(&data_x[i], x);
594+
simd::store_to_mem(&data_y[i], y);
595+
}
596+
}
597+
uint64_t end = quadiron::hw_timer();
598+
double avg_cycles_nb =
599+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
600+
std::cout << "Perf of Butterfly_CT on buffer of "
601+
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
602+
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
603+
604+
start = quadiron::hw_timer();
605+
for (unsigned i = 0; i < iters_nb; ++i) {
606+
for (size_t j = 0; j < this->vec_len; ++j) {
607+
simd::VecType x = simd::load_to_reg(&data_x[i]);
608+
simd::VecType y = simd::load_to_reg(&data_y[i]);
609+
610+
simd::VecType z = simd::mod_mul<TypeParam>(c, y);
611+
y = simd::mod_sub<TypeParam>(x, z);
612+
x = simd::mod_add<TypeParam>(x, z);
613+
614+
simd::store_to_mem(&data_x[i], x);
615+
simd::store_to_mem(&data_y[i], y);
616+
}
617+
}
618+
end = quadiron::hw_timer();
619+
avg_cycles_nb =
620+
static_cast<double>(end - start) / static_cast<double>(iters_nb);
621+
std::cout << "Perf of MANUAL Butterfly_CT on buffer of "
622+
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
623+
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
624+
}
354625
#endif

0 commit comments

Comments
 (0)