Skip to content

Commit 62e9ed1

Browse files
authored
Merge pull request #454 from howjmay/vmul_lane_f64
feat: Add vmul[q]_lane_f64
2 parents 956c853 + ebbc303 commit 62e9ed1

File tree

3 files changed

+50
-6
lines changed

3 files changed

+50
-6
lines changed

neon2rvv.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -8775,9 +8775,15 @@ FORCE_INLINE float32x4_t vmulq_lane_f32(float32x4_t a, float32x2_t b, const int
87758775
return __riscv_vfmul_vv_f32m1(a, b_dup_lane, 4);
87768776
}
87778777

8778-
// FORCE_INLINE float64x1_t vmul_lane_f64(float64x1_t a, float64x1_t v, const int lane);
8778+
FORCE_INLINE float64x1_t vmul_lane_f64(float64x1_t a, float64x1_t b, const int lane) {
8779+
vfloat64m1_t b_dup_lane = __riscv_vrgather_vx_f64m1(b, lane, 1);
8780+
return __riscv_vfmul_vv_f64m1(a, b_dup_lane, 1);
8781+
}
87798782

8780-
// FORCE_INLINE float64x2_t vmulq_lane_f64(float64x2_t a, float64x1_t v, const int lane);
8783+
FORCE_INLINE float64x2_t vmulq_lane_f64(float64x2_t a, float64x1_t b, const int lane) {
8784+
vfloat64m1_t b_dup_lane = __riscv_vrgather_vx_f64m1(b, lane, 2);
8785+
return __riscv_vfmul_vv_f64m1(a, b_dup_lane, 2);
8786+
}
87818787

87828788
// FORCE_INLINE float32_t vmuls_lane_f32(float32_t a, float32x2_t v, const int lane);
87838789

tests/impl.cpp

+40-2
Original file line numberDiff line numberDiff line change
@@ -31083,9 +31083,47 @@ result_t test_vmulq_lane_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
3108331083
#endif // ENABLE_TEST_ALL
3108431084
}
3108531085

31086-
result_t test_vmul_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
31086+
result_t test_vmul_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
31087+
#ifdef ENABLE_TEST_ALL
31088+
const double *_a = (double *)impl.test_cases_float_pointer1;
31089+
const double *_b = (double *)impl.test_cases_float_pointer2;
31090+
double _c[2];
31091+
float64x1_t a = vld1_f64(_a);
31092+
float64x1_t b = vld1_f64(_b);
31093+
float64x1_t c;
31094+
31095+
for (int i = 0; i < 1; i++) {
31096+
_c[i] = _a[i] * _b[0];
31097+
}
31098+
c = vmul_lane_f64(a, b, 0);
31099+
return validate_double(c, _c[0]);
31100+
31101+
return TEST_SUCCESS;
31102+
31103+
#else
31104+
return TEST_UNIMPL;
31105+
#endif // ENABLE_TEST_ALL
31106+
}
31107+
31108+
result_t test_vmulq_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
31109+
#ifdef ENABLE_TEST_ALL
31110+
const double *_a = (double *)impl.test_cases_float_pointer1;
31111+
const double *_b = (double *)impl.test_cases_float_pointer2;
31112+
double _c[4];
31113+
float64x2_t a = vld1q_f64(_a);
31114+
float64x1_t b = vld1_f64(_b);
31115+
float64x2_t c;
3108731116

31088-
result_t test_vmulq_lane_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
31117+
for (int i = 0; i < 2; i++) {
31118+
_c[i] = _a[i] * _b[0];
31119+
}
31120+
c = vmulq_lane_f64(a, b, 0);
31121+
return validate_double(c, _c[0], _c[1]);
31122+
31123+
#else
31124+
return TEST_UNIMPL;
31125+
#endif // ENABLE_TEST_ALL
31126+
}
3108931127

3109031128
result_t test_vmuls_lane_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
3109131129

tests/impl.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -1890,8 +1890,8 @@
18901890
_(vmulq_lane_s16) \
18911891
_(vmulq_lane_s32) \
18921892
_(vmulq_lane_f32) \
1893-
/*_(vmul_lane_f64) */ \
1894-
/*_(vmulq_lane_f64) */ \
1893+
_(vmul_lane_f64) \
1894+
_(vmulq_lane_f64) \
18951895
/*_(vmuls_lane_f32) */ \
18961896
/*_(vmuld_lane_f64) */ \
18971897
/*_(vmul_laneq_s16) */ \

0 commit comments

Comments
 (0)