@@ -31507,13 +31507,113 @@ result_t test_vmlal_lane_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
31507
31507
#endif // ENABLE_TEST_ALL
31508
31508
}
31509
31509
31510
- result_t test_vmlal_high_lane_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
31510
+ result_t test_vmlal_high_lane_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
31511
+ #ifdef ENABLE_TEST_ALL
31512
+ const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
31513
+ const int16_t *_b = (int16_t *)impl.test_cases_int_pointer2;
31514
+ const int16_t *_c = (int16_t *)impl.test_cases_int_pointer3;
31515
+ int32_t _d[8];
31516
+ int32x4_t a = vld1q_s32(_a);
31517
+ int16x8_t b = vld1q_s16(_b);
31518
+ int16x4_t c = vld1_s16(_c);
31519
+ int32x4_t d;
31520
+
31521
+ #define TEST_IMPL(IDX) \
31522
+ for (int i = 0; i < 4; i++) { \
31523
+ _d[i] = _a[i] + (int32_t)_b[i + 4] * (int32_t)_c[IDX]; \
31524
+ } \
31525
+ d = vmlal_high_lane_s16(a, b, c, IDX); \
31526
+ CHECK_RESULT(validate_int32(d, _d[0], _d[1], _d[2], _d[3]))
31527
+
31528
+ IMM_4_ITER
31529
+ #undef TEST_IMPL
31530
+
31531
+ return TEST_SUCCESS;
31532
+ #else
31533
+ return TEST_UNIMPL;
31534
+ #endif // ENABLE_TEST_ALL
31535
+ }
31511
31536
31512
- result_t test_vmlal_high_lane_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
31537
+ result_t test_vmlal_high_lane_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
31538
+ #ifdef ENABLE_TEST_ALL
31539
+ const int64_t *_a = (int64_t *)impl.test_cases_int_pointer1;
31540
+ const int32_t *_b = (int32_t *)impl.test_cases_int_pointer2;
31541
+ const int32_t *_c = (int32_t *)impl.test_cases_int_pointer3;
31542
+ int64_t _d[4];
31543
+ int64x2_t a = vld1q_s64(_a);
31544
+ int32x4_t b = vld1q_s32(_b);
31545
+ int32x2_t c = vld1_s32(_c);
31546
+ int64x2_t d;
31547
+
31548
+ #define TEST_IMPL(IDX) \
31549
+ for (int i = 0; i < 2; i++) { \
31550
+ _d[i] = _a[i] + (int64_t)_b[i + 2] * (int64_t)_c[IDX]; \
31551
+ } \
31552
+ d = vmlal_high_lane_s32(a, b, c, IDX); \
31553
+ CHECK_RESULT(validate_int64(d, _d[0], _d[1]))
31513
31554
31514
- result_t test_vmlal_high_lane_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
31555
+ IMM_2_ITER
31556
+ #undef TEST_IMPL
31557
+
31558
+ return TEST_SUCCESS;
31559
+ #else
31560
+ return TEST_UNIMPL;
31561
+ #endif // ENABLE_TEST_ALL
31562
+ }
31515
31563
31516
- result_t test_vmlal_high_lane_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
31564
+ result_t test_vmlal_high_lane_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
31565
+ #ifdef ENABLE_TEST_ALL
31566
+ const uint32_t *_a = (uint32_t *)impl.test_cases_int_pointer1;
31567
+ const uint16_t *_b = (uint16_t *)impl.test_cases_int_pointer2;
31568
+ const uint16_t *_c = (uint16_t *)impl.test_cases_int_pointer3;
31569
+ uint32_t _d[8];
31570
+ uint32x4_t a = vld1q_u32(_a);
31571
+ uint16x8_t b = vld1q_u16(_b);
31572
+ uint16x4_t c = vld1_u16(_c);
31573
+ uint32x4_t d;
31574
+
31575
+ #define TEST_IMPL(IDX) \
31576
+ for (int i = 0; i < 4; i++) { \
31577
+ _d[i] = _a[i] + (uint32_t)_b[i + 4] * (uint32_t)_c[IDX]; \
31578
+ } \
31579
+ d = vmlal_high_lane_u16(a, b, c, IDX); \
31580
+ CHECK_RESULT(validate_uint32(d, _d[0], _d[1], _d[2], _d[3]))
31581
+
31582
+ IMM_4_ITER
31583
+ #undef TEST_IMPL
31584
+
31585
+ return TEST_SUCCESS;
31586
+ #else
31587
+ return TEST_UNIMPL;
31588
+ #endif // ENABLE_TEST_ALL
31589
+ }
31590
+
31591
+ result_t test_vmlal_high_lane_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
31592
+ #ifdef ENABLE_TEST_ALL
31593
+ const uint64_t *_a = (uint64_t *)impl.test_cases_int_pointer1;
31594
+ const uint32_t *_b = (uint32_t *)impl.test_cases_int_pointer2;
31595
+ const uint32_t *_c = (uint32_t *)impl.test_cases_int_pointer3;
31596
+ uint64_t _d[4];
31597
+ uint64x2_t a = vld1q_u64(_a);
31598
+ uint32x4_t b = vld1q_u32(_b);
31599
+ uint32x2_t c = vld1_u32(_c);
31600
+ uint64x2_t d;
31601
+
31602
+ #define TEST_IMPL(IDX) \
31603
+ for (int i = 0; i < 2; i++) { \
31604
+ _d[i] = _a[i] + (uint64_t)_b[i + 2] * (uint64_t)_c[IDX]; \
31605
+ } \
31606
+ d = vmlal_high_lane_u32(a, b, c, IDX); \
31607
+ CHECK_RESULT(validate_uint64(d, _d[0], _d[1]))
31608
+
31609
+ IMM_2_ITER
31610
+ #undef TEST_IMPL
31611
+
31612
+ return TEST_SUCCESS;
31613
+ #else
31614
+ return TEST_UNIMPL;
31615
+ #endif // ENABLE_TEST_ALL
31616
+ }
31517
31617
31518
31618
result_t test_vmlal_laneq_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
31519
31619
0 commit comments