@@ -9061,93 +9061,123 @@ FORCE_INLINE int64x2_t vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_
9061
9061
return __riscv_vadd_vv_i64m1(a, bc_mulx2, 2);
9062
9062
}
9063
9063
9064
- FORCE_INLINE int16x4_t vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c, const int __d ) {
9065
- vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, __d , 4);
9064
+ FORCE_INLINE int16x4_t vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c, const int lane ) {
9065
+ vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, lane , 4);
9066
9066
return __riscv_vnmsac_vv_i16m1(a, b, c_dup, 4);
9067
9067
}
9068
9068
9069
- FORCE_INLINE int32x2_t vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c, const int __d ) {
9070
- vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, __d , 2);
9069
+ FORCE_INLINE int32x2_t vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c, const int lane ) {
9070
+ vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, lane , 2);
9071
9071
return __riscv_vnmsac_vv_i32m1(a, b, c_dup, 2);
9072
9072
}
9073
9073
9074
- FORCE_INLINE float32x2_t vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c, const int __d ) {
9075
- vfloat32m1_t c_dup = __riscv_vrgather_vx_f32m1(c, __d , 2);
9074
+ FORCE_INLINE float32x2_t vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c, const int lane ) {
9075
+ vfloat32m1_t c_dup = __riscv_vrgather_vx_f32m1(c, lane , 2);
9076
9076
return __riscv_vfnmsac_vv_f32m1(a, b, c_dup, 2);
9077
9077
}
9078
9078
9079
- FORCE_INLINE uint16x4_t vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c, const int __d ) {
9080
- vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, __d , 4);
9079
+ FORCE_INLINE uint16x4_t vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c, const int lane ) {
9080
+ vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, lane , 4);
9081
9081
return __riscv_vnmsac_vv_u16m1(a, b, c_dup, 4);
9082
9082
}
9083
9083
9084
- FORCE_INLINE uint32x2_t vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c, const int __d ) {
9085
- vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, __d , 2);
9084
+ FORCE_INLINE uint32x2_t vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c, const int lane ) {
9085
+ vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, lane , 2);
9086
9086
return __riscv_vnmsac_vv_u32m1(a, b, c_dup, 2);
9087
9087
}
9088
9088
9089
- FORCE_INLINE int16x8_t vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c, const int __d ) {
9090
- vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, __d , 8);
9089
+ FORCE_INLINE int16x8_t vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c, const int lane ) {
9090
+ vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, lane , 8);
9091
9091
return __riscv_vnmsac_vv_i16m1(a, b, c_dup, 8);
9092
9092
}
9093
9093
9094
- FORCE_INLINE int32x4_t vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c, const int __d ) {
9095
- vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, __d , 4);
9094
+ FORCE_INLINE int32x4_t vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c, const int lane ) {
9095
+ vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, lane , 4);
9096
9096
return __riscv_vnmsac_vv_i32m1(a, b, c_dup, 4);
9097
9097
}
9098
9098
9099
- FORCE_INLINE float32x4_t vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c, const int __d ) {
9100
- vfloat32m1_t c_dup = __riscv_vrgather_vx_f32m1(c, __d , 4);
9099
+ FORCE_INLINE float32x4_t vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c, const int lane ) {
9100
+ vfloat32m1_t c_dup = __riscv_vrgather_vx_f32m1(c, lane , 4);
9101
9101
return __riscv_vfnmsac_vv_f32m1(a, b, c_dup, 4);
9102
9102
}
9103
9103
9104
- // FORCE_INLINE int16x4_t vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v, const int lane);
9104
+ FORCE_INLINE int16x4_t vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t c, const int lane) {
9105
+ vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, lane, 8);
9106
+ return __riscv_vnmsac_vv_i16m1(a, b, c_dup, 4);
9107
+ }
9105
9108
9106
- // FORCE_INLINE int16x8_t vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v, const int lane);
9109
+ FORCE_INLINE int16x8_t vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t c, const int lane) {
9110
+ vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, lane, 8);
9111
+ return __riscv_vnmsac_vv_i16m1(a, b, c_dup, 8);
9112
+ }
9107
9113
9108
- // FORCE_INLINE int32x2_t vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v, const int lane);
9114
+ FORCE_INLINE int32x2_t vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t c, const int lane) {
9115
+ vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, lane, 4);
9116
+ return __riscv_vnmsac_vv_i32m1(a, b, c_dup, 2);
9117
+ }
9109
9118
9110
- // FORCE_INLINE int32x4_t vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v, const int lane);
9119
+ FORCE_INLINE int32x4_t vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t c, const int lane) {
9120
+ vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, lane, 4);
9121
+ return __riscv_vnmsac_vv_i32m1(a, b, c_dup, 4);
9122
+ }
9111
9123
9112
- // FORCE_INLINE uint16x4_t vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane);
9124
+ FORCE_INLINE uint16x4_t vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t c, const int lane) {
9125
+ vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, lane, 8);
9126
+ return __riscv_vnmsac_vv_u16m1(a, b, c_dup, 4);
9127
+ }
9113
9128
9114
- // FORCE_INLINE uint16x8_t vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane);
9129
+ FORCE_INLINE uint16x8_t vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c, const int lane) {
9130
+ vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, lane, 8);
9131
+ return __riscv_vnmsac_vv_u16m1(a, b, c_dup, 8);
9132
+ }
9115
9133
9116
- // FORCE_INLINE uint32x2_t vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane);
9134
+ FORCE_INLINE uint32x2_t vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t c, const int lane) {
9135
+ vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, lane, 4);
9136
+ return __riscv_vnmsac_vv_u32m1(a, b, c_dup, 2);
9137
+ }
9117
9138
9118
- // FORCE_INLINE uint32x4_t vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane);
9139
+ FORCE_INLINE uint32x4_t vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c, const int lane) {
9140
+ vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, lane, 4);
9141
+ return __riscv_vnmsac_vv_u32m1(a, b, c_dup, 4);
9142
+ }
9119
9143
9120
- // FORCE_INLINE float32x2_t vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v, const int lane);
9144
+ FORCE_INLINE float32x2_t vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c, const int lane) {
9145
+ vfloat32m1_t c_dup = __riscv_vrgather_vx_f32m1(c, lane, 4);
9146
+ return __riscv_vfnmsac_vv_f32m1(a, b, c_dup, 2);
9147
+ }
9121
9148
9122
- // FORCE_INLINE float32x4_t vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v, const int lane);
9149
+ FORCE_INLINE float32x4_t vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, const int lane) {
9150
+ vfloat32m1_t c_dup = __riscv_vrgather_vx_f32m1(c, lane, 4);
9151
+ return __riscv_vfnmsac_vv_f32m1(a, b, c_dup, 4);
9152
+ }
9123
9153
9124
- FORCE_INLINE uint16x8_t vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c, const int __d ) {
9125
- vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, __d , 8);
9154
+ FORCE_INLINE uint16x8_t vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c, const int lane ) {
9155
+ vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, lane , 8);
9126
9156
return __riscv_vnmsac_vv_u16m1(a, b, c_dup, 8);
9127
9157
}
9128
9158
9129
- FORCE_INLINE uint32x4_t vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c, const int __d ) {
9130
- vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, __d , 4);
9159
+ FORCE_INLINE uint32x4_t vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c, const int lane ) {
9160
+ vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, lane , 4);
9131
9161
return __riscv_vnmsac_vv_u32m1(a, b, c_dup, 4);
9132
9162
}
9133
9163
9134
- FORCE_INLINE int32x4_t vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c, const int __d ) {
9135
- vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, __d , 4);
9164
+ FORCE_INLINE int32x4_t vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c, const int lane ) {
9165
+ vint16m1_t c_dup = __riscv_vrgather_vx_i16m1(c, lane , 4);
9136
9166
return __riscv_vsub_vv_i32m1(a, __riscv_vlmul_trunc_v_i32m2_i32m1(__riscv_vwmul_vv_i32m2(b, c_dup, 4)), 4);
9137
9167
}
9138
9168
9139
- FORCE_INLINE int64x2_t vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c, const int __d ) {
9140
- vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, __d , 2);
9169
+ FORCE_INLINE int64x2_t vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c, const int lane ) {
9170
+ vint32m1_t c_dup = __riscv_vrgather_vx_i32m1(c, lane , 2);
9141
9171
return __riscv_vsub_vv_i64m1(a, __riscv_vlmul_trunc_v_i64m2_i64m1(__riscv_vwmul_vv_i64m2(b, c_dup, 2)), 2);
9142
9172
}
9143
9173
9144
- FORCE_INLINE uint32x4_t vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c, const int __d ) {
9145
- vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, __d , 8);
9174
+ FORCE_INLINE uint32x4_t vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c, const int lane ) {
9175
+ vuint16m1_t c_dup = __riscv_vrgather_vx_u16m1(c, lane , 8);
9146
9176
return __riscv_vsub_vv_u32m1(a, __riscv_vlmul_trunc_v_u32m2_u32m1(__riscv_vwmulu_vv_u32m2(b, c_dup, 4)), 4);
9147
9177
}
9148
9178
9149
- FORCE_INLINE uint64x2_t vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c, const int __d ) {
9150
- vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, __d , 4);
9179
+ FORCE_INLINE uint64x2_t vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c, const int lane ) {
9180
+ vuint32m1_t c_dup = __riscv_vrgather_vx_u32m1(c, lane , 4);
9151
9181
return __riscv_vsub_vv_u64m1(a, __riscv_vlmul_trunc_v_u64m2_u64m1(__riscv_vwmulu_vv_u64m2(b, c_dup, 2)), 2);
9152
9182
}
9153
9183
0 commit comments