|
29 | 29 | */ |
30 | 30 | #include <vector> |
31 | 31 |
|
| 32 | +#include <functional> |
32 | 33 | #include <gtest/gtest.h> |
33 | 34 |
|
34 | 35 | #include "arith.h" |
35 | 36 | #include "core.h" |
| 37 | +#include "misc.h" |
36 | 38 | #include "vec_buffers.h" |
37 | 39 |
|
38 | 40 | #ifdef QUADIRON_USE_SIMD |
@@ -98,6 +100,15 @@ class SimdTestFnt : public ::testing::Test { |
98 | 100 | return vec[0]; |
99 | 101 | } |
100 | 102 |
|
| 103 | + void gen_rand_data(std::vector<T>& vec) |
| 104 | + { |
| 105 | + const size_t len = vec.size(); |
| 106 | + |
| 107 | + for (size_t i = 0; i < len; i++) { |
| 108 | + vec[i] = distribution->operator()(quadiron::prng()); |
| 109 | + } |
| 110 | + } |
| 111 | + |
101 | 112 | simd::VecType copy(simd::VecType x) |
102 | 113 | { |
103 | 114 | const size_t n = simd::countof<T>(); |
@@ -222,8 +233,73 @@ class SimdTestFnt : public ::testing::Test { |
222 | 233 | x = simd::load_to_reg(reinterpret_cast<simd::VecType*>(x_buf)); |
223 | 234 | } |
224 | 235 |
|
| 236 | + void core_op_perf_lambda( |
| 237 | + const std::string& text, |
| 238 | + const std::function<void(simd::VecType&, const simd::VecType&)>& f) |
| 239 | + { |
| 240 | + const size_t len = vec_len * simd::countof<T>(); |
| 241 | + |
| 242 | + std::vector<T> buf_x(len); |
| 243 | + std::vector<T> buf_y(len); |
| 244 | + gen_rand_data(buf_x); |
| 245 | + gen_rand_data(buf_y); |
| 246 | + |
| 247 | + simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data()); |
| 248 | + simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data()); |
| 249 | + |
| 250 | + uint64_t start = quadiron::hw_timer(); |
| 251 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 252 | + for (size_t j = 0; j < vec_len; ++j) { |
| 253 | + simd::VecType x = simd::load_to_reg(&data_x[i]); |
| 254 | + simd::VecType y = simd::load_to_reg(&data_y[i]); |
| 255 | + |
| 256 | + f(x, y); |
| 257 | + |
| 258 | + simd::store_to_mem(&data_x[i], x); |
| 259 | + } |
| 260 | + } |
| 261 | + uint64_t end = quadiron::hw_timer(); |
| 262 | + double avg_cycles_nb = |
| 263 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 264 | + std::cout << "Average nb of CPU cycles per operation " << text << ": " |
| 265 | + << avg_cycles_nb / vec_len << "\n"; |
| 266 | + } |
| 267 | + |
| 268 | + template <typename TFunc> |
| 269 | + void core_op_perf_template(const std::string& text, const TFunc& f) |
| 270 | + { |
| 271 | + const size_t len = vec_len * simd::countof<T>(); |
| 272 | + |
| 273 | + std::vector<T> buf_x(len); |
| 274 | + std::vector<T> buf_y(len); |
| 275 | + gen_rand_data(buf_x); |
| 276 | + gen_rand_data(buf_y); |
| 277 | + |
| 278 | + simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data()); |
| 279 | + simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data()); |
| 280 | + |
| 281 | + uint64_t start = quadiron::hw_timer(); |
| 282 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 283 | + for (size_t j = 0; j < vec_len; ++j) { |
| 284 | + simd::VecType x = simd::load_to_reg(&data_x[i]); |
| 285 | + simd::VecType y = simd::load_to_reg(&data_y[i]); |
| 286 | + |
| 287 | + f(x, y); |
| 288 | + |
| 289 | + simd::store_to_mem(&data_x[i], x); |
| 290 | + } |
| 291 | + } |
| 292 | + uint64_t end = quadiron::hw_timer(); |
| 293 | + double avg_cycles_nb = |
| 294 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 295 | + std::cout << "Average nb of CPU cycles per operation " << text << ": " |
| 296 | + << avg_cycles_nb / vec_len << "\n"; |
| 297 | + } |
| 298 | + |
225 | 299 | T q; |
226 | 300 | std::unique_ptr<std::uniform_int_distribution<uint32_t>> distribution; |
| 301 | + size_t vec_len = 256; |
| 302 | + size_t iters_nb = 1e3; |
227 | 303 | }; |
228 | 304 |
|
229 | 305 | using AllTypes = ::testing::Types<uint16_t, uint32_t>; |
@@ -351,4 +427,199 @@ TYPED_TEST(SimdTestFnt, TestButterflyGs) // NOLINT |
351 | 427 | } |
352 | 428 | } |
353 | 429 |
|
| 430 | +TYPED_TEST(SimdTestFnt, PerfModMulSingle) // NOLINT |
| 431 | +{ |
| 432 | + const size_t iters_nb = 1e5; |
| 433 | + simd::VecType x = this->rand_vec(); |
| 434 | + simd::VecType y = this->rand_vec(); |
| 435 | + |
| 436 | + uint64_t start = quadiron::hw_timer(); |
| 437 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 438 | + simd::VecType _x = simd::load_to_reg(&x); |
| 439 | + simd::VecType _y = simd::load_to_reg(&y); |
| 440 | + |
| 441 | + _x = simd::mod_mul<TypeParam>(_x, _y); |
| 442 | + |
| 443 | + simd::store_to_mem(&x, _x); |
| 444 | + } |
| 445 | + uint64_t end = quadiron::hw_timer(); |
| 446 | + double avg_cycles_nb = |
| 447 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 448 | + std::cout << "PerfModMulSingle: " << avg_cycles_nb << "\n"; |
| 449 | +} |
| 450 | + |
| 451 | +TYPED_TEST(SimdTestFnt, PerfModMulBuf) // NOLINT |
| 452 | +{ |
| 453 | + const size_t iters_nb = 1e3; |
| 454 | + |
| 455 | + const size_t len = this->vec_len * simd::countof<TypeParam>(); |
| 456 | + std::vector<TypeParam> buf_x(len); |
| 457 | + std::vector<TypeParam> buf_y(len); |
| 458 | + this->gen_rand_data(buf_x); |
| 459 | + this->gen_rand_data(buf_y); |
| 460 | + |
| 461 | + simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data()); |
| 462 | + simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data()); |
| 463 | + |
| 464 | + uint64_t start = quadiron::hw_timer(); |
| 465 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 466 | + for (size_t j = 0; j < this->vec_len; ++j) { |
| 467 | + simd::VecType x = simd::load_to_reg(&data_x[i]); |
| 468 | + simd::VecType y = simd::load_to_reg(&data_y[i]); |
| 469 | + |
| 470 | + x = simd::mod_mul<TypeParam>(x, y); |
| 471 | + |
| 472 | + simd::store_to_mem(&data_x[i], x); |
| 473 | + } |
| 474 | + } |
| 475 | + uint64_t end = quadiron::hw_timer(); |
| 476 | + double avg_cycles_nb = |
| 477 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 478 | + std::cout << "Perf of ModMul on buffer of " |
| 479 | + << len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb |
| 480 | + << " => per operation: " << avg_cycles_nb / this->vec_len << "\n"; |
| 481 | +} |
| 482 | + |
| 483 | +TYPED_TEST(SimdTestFnt, PerfModMulBufLambda) // NOLINT |
| 484 | +{ |
| 485 | + this->core_op_perf_lambda( |
| 486 | + "[Lambda] ModMul", [](simd::VecType& x, const simd::VecType& y) { |
| 487 | + x = simd::mod_mul<TypeParam>(x, y); |
| 488 | + }); |
| 489 | +} |
| 490 | + |
| 491 | +TYPED_TEST(SimdTestFnt, PerfModMulBufTemplate) // NOLINT |
| 492 | +{ |
| 493 | + this->core_op_perf_template( |
| 494 | + "[Template] ModMul", [](simd::VecType& x, const simd::VecType& y) { |
| 495 | + x = simd::mod_mul<TypeParam>(x, y); |
| 496 | + }); |
| 497 | +} |
| 498 | + |
| 499 | +TYPED_TEST(SimdTestFnt, PerfModAddBuf) // NOLINT |
| 500 | +{ |
| 501 | + const size_t iters_nb = 1e3; |
| 502 | + |
| 503 | + const size_t len = this->vec_len * simd::countof<TypeParam>(); |
| 504 | + std::vector<TypeParam> buf_x(len); |
| 505 | + std::vector<TypeParam> buf_y(len); |
| 506 | + this->gen_rand_data(buf_x); |
| 507 | + this->gen_rand_data(buf_y); |
| 508 | + |
| 509 | + simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data()); |
| 510 | + simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data()); |
| 511 | + |
| 512 | + uint64_t start = quadiron::hw_timer(); |
| 513 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 514 | + for (size_t j = 0; j < this->vec_len; ++j) { |
| 515 | + simd::VecType x = simd::load_to_reg(&data_x[i]); |
| 516 | + simd::VecType y = simd::load_to_reg(&data_y[i]); |
| 517 | + |
| 518 | + x = simd::mod_add<TypeParam>(x, y); |
| 519 | + |
| 520 | + simd::store_to_mem(&data_x[i], x); |
| 521 | + } |
| 522 | + } |
| 523 | + uint64_t end = quadiron::hw_timer(); |
| 524 | + double avg_cycles_nb = |
| 525 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 526 | + std::cout << "Perf of ModAdd on buffer of " |
| 527 | + << len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb |
| 528 | + << " => per operation: " << avg_cycles_nb / this->vec_len << "\n"; |
| 529 | +} |
| 530 | + |
| 531 | +TYPED_TEST(SimdTestFnt, PerfModSubBuf) // NOLINT |
| 532 | +{ |
| 533 | + const size_t iters_nb = 1e3; |
| 534 | + |
| 535 | + const size_t len = this->vec_len * simd::countof<TypeParam>(); |
| 536 | + std::vector<TypeParam> buf_x(len); |
| 537 | + std::vector<TypeParam> buf_y(len); |
| 538 | + this->gen_rand_data(buf_x); |
| 539 | + this->gen_rand_data(buf_y); |
| 540 | + |
| 541 | + simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data()); |
| 542 | + simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data()); |
| 543 | + |
| 544 | + std::vector<simd::VecType> data_z(this->vec_len); |
| 545 | + |
| 546 | + uint64_t start = quadiron::hw_timer(); |
| 547 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 548 | + for (size_t j = 0; j < this->vec_len; ++j) { |
| 549 | + simd::VecType x = simd::load_to_reg(&data_x[i]); |
| 550 | + simd::VecType y = simd::load_to_reg(&data_y[i]); |
| 551 | + |
| 552 | + x = simd::mod_sub<TypeParam>(x, y); |
| 553 | + |
| 554 | + simd::store_to_mem(&data_x[i], x); |
| 555 | + } |
| 556 | + } |
| 557 | + uint64_t end = quadiron::hw_timer(); |
| 558 | + double avg_cycles_nb = |
| 559 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 560 | + std::cout << "Perf of ModSub on buffer of " |
| 561 | + << len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb |
| 562 | + << " => per operation: " << avg_cycles_nb / this->vec_len << "\n"; |
| 563 | +} |
| 564 | + |
| 565 | +TYPED_TEST(SimdTestFnt, PerfButterflyCt) // NOLINT |
| 566 | +{ |
| 567 | + const size_t iters_nb = 1e3; |
| 568 | + |
| 569 | + const size_t len = this->vec_len * simd::countof<TypeParam>(); |
| 570 | + std::vector<TypeParam> buf_x(len); |
| 571 | + std::vector<TypeParam> buf_y(len); |
| 572 | + this->gen_rand_data(buf_x); |
| 573 | + this->gen_rand_data(buf_y); |
| 574 | + |
| 575 | + simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data()); |
| 576 | + simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data()); |
| 577 | + |
| 578 | + std::vector<simd::VecType> data_z(this->vec_len); |
| 579 | + |
| 580 | + TypeParam coef = |
| 581 | + 1 + this->distribution->operator()(quadiron::prng()) % (this->q - 2); |
| 582 | + const simd::CtGsCase ct_case = simd::get_case<TypeParam>(coef, this->q); |
| 583 | + simd::VecType c = simd::set_one(coef); |
| 584 | + |
| 585 | + uint64_t start = quadiron::hw_timer(); |
| 586 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 587 | + for (size_t j = 0; j < this->vec_len; ++j) { |
| 588 | + simd::VecType x = simd::load_to_reg(&data_x[i]); |
| 589 | + simd::VecType y = simd::load_to_reg(&data_y[i]); |
| 590 | + |
| 591 | + simd::butterfly_ct<TypeParam>(ct_case, c, x, y); |
| 592 | + |
| 593 | + simd::store_to_mem(&data_x[i], x); |
| 594 | + simd::store_to_mem(&data_y[i], y); |
| 595 | + } |
| 596 | + } |
| 597 | + uint64_t end = quadiron::hw_timer(); |
| 598 | + double avg_cycles_nb = |
| 599 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 600 | + std::cout << "Perf of Butterfly_CT on buffer of " |
| 601 | + << len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb |
| 602 | + << " => per operation: " << avg_cycles_nb / this->vec_len << "\n"; |
| 603 | + |
| 604 | + start = quadiron::hw_timer(); |
| 605 | + for (unsigned i = 0; i < iters_nb; ++i) { |
| 606 | + for (size_t j = 0; j < this->vec_len; ++j) { |
| 607 | + simd::VecType x = simd::load_to_reg(&data_x[i]); |
| 608 | + simd::VecType y = simd::load_to_reg(&data_y[i]); |
| 609 | + |
| 610 | + simd::VecType z = simd::mod_mul<TypeParam>(c, y); |
| 611 | + y = simd::mod_sub<TypeParam>(x, z); |
| 612 | + x = simd::mod_add<TypeParam>(x, z); |
| 613 | + |
| 614 | + simd::store_to_mem(&data_x[i], x); |
| 615 | + simd::store_to_mem(&data_y[i], y); |
| 616 | + } |
| 617 | + } |
| 618 | + end = quadiron::hw_timer(); |
| 619 | + avg_cycles_nb = |
| 620 | + static_cast<double>(end - start) / static_cast<double>(iters_nb); |
| 621 | + std::cout << "Perf of MANUAL Butterfly_CT on buffer of " |
| 622 | + << len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb |
| 623 | + << " => per operation: " << avg_cycles_nb / this->vec_len << "\n"; |
| 624 | +} |
354 | 625 | #endif |
0 commit comments