Skip to content

Commit fc3c868

Browse files
authored
pulley: Fill out most remaining simd float ops (bytecodealliance#9884)
* pulley: Fill out most remaining simd float ops Get most simd/float-related tests passing. Mostly reusing preexisting scalar ops for the simd implementation. * Fix fma test on MinGW prtest:full * More MinGW fixes
1 parent a252039 commit fc3c868

23 files changed

+247
-17
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

+36-2
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,9 @@
756756
(rule (lower (fcmp cc a b @ (value_type (ty_scalar_float ty))))
757757
(lower_fcmp ty cc a b))
758758

759+
(rule 1 (lower (fcmp cc a b @ (value_type (ty_vec128 ty))))
760+
(lower_vfcmp ty cc a b))
761+
759762
(decl lower_fcmp (Type FloatCC Value Value) XReg)
760763

761764
(rule (lower_fcmp $F32 (FloatCC.Equal) a b) (pulley_feq32 a b))
@@ -787,6 +790,32 @@
787790
(if-let true (floatcc_unordered cc))
788791
(pulley_xbxor32_s8 (lower_fcmp ty (floatcc_complement cc) a b) 1))
789792

793+
(decl lower_vfcmp (Type FloatCC Value Value) VReg)
794+
795+
(rule (lower_vfcmp $F32X4 (FloatCC.Equal) a b) (pulley_veqf32x4 a b))
796+
(rule (lower_vfcmp $F64X2 (FloatCC.Equal) a b) (pulley_veqf64x2 a b))
797+
(rule (lower_vfcmp $F32X4 (FloatCC.NotEqual) a b) (pulley_vneqf32x4 a b))
798+
(rule (lower_vfcmp $F64X2 (FloatCC.NotEqual) a b) (pulley_vneqf64x2 a b))
799+
(rule (lower_vfcmp $F32X4 (FloatCC.LessThan) a b) (pulley_vltf32x4 a b))
800+
(rule (lower_vfcmp $F64X2 (FloatCC.LessThan) a b) (pulley_vltf64x2 a b))
801+
(rule (lower_vfcmp $F32X4 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf32x4 a b))
802+
(rule (lower_vfcmp $F64X2 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf64x2 a b))
803+
804+
(rule (lower_vfcmp ty (FloatCC.Unordered) a b)
805+
(pulley_vbor128
806+
(lower_vfcmp ty (FloatCC.NotEqual) a a)
807+
(lower_vfcmp ty (FloatCC.NotEqual) b b)))
808+
809+
;; NB: Pulley doesn't have lowerings for `Ordered` or `Unordered*` `FloatCC`
810+
;; conditions as that's not needed by wasm at this time.
811+
812+
;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the
813+
;; operation.
814+
(rule (lower_vfcmp ty (FloatCC.GreaterThan) a b)
815+
(lower_vfcmp ty (FloatCC.LessThan) b a))
816+
(rule (lower_vfcmp ty (FloatCC.GreaterThanOrEqual) a b)
817+
(lower_vfcmp ty (FloatCC.LessThanOrEqual) b a))
818+
790819
;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
791820

792821
(decl amode (Value Offset32) Amode)
@@ -1203,6 +1232,7 @@
12031232
(pulley_vfloor32x4 a))
12041233
(rule (lower (has_type $F64X2 (floor a)))
12051234
(pulley_vfloor64x2 a))
1235+
12061236
;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12071237

12081238
(rule (lower (has_type $F32 (ceil a))) (pulley_fceil32 a))
@@ -1230,7 +1260,6 @@
12301260
(rule (lower (has_type $F64X2 (sqrt a)))
12311261
(pulley_vsqrt64x2 a))
12321262

1233-
12341263
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12351264

12361265
(rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a))
@@ -1407,11 +1436,16 @@
14071436
(rule (lower (scalar_to_vector a @ (value_type $F64)))
14081437
(pulley_vinsertf64 (pulley_vconst128 0) a 0))
14091438

1410-
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1439+
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14111440

14121441
(rule (lower (has_type $I8X16 (shuffle a b (u128_from_immediate mask))))
14131442
(pulley_vshuffle a b mask))
14141443

14151444
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14161445

14171446
(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
1447+
1448+
;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1449+
1450+
(rule (lower (has_type $F32X4 (fma a b c))) (pulley_vfma32x4 a b c))
1451+
(rule (lower (has_type $F64X2 (fma a b c))) (pulley_vfma64x2 a b c))

cranelift/filetests/filetests/runtests/simd-fadd-splat.clif

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
88
set enable_multi_ret_implicit_sret
99
target riscv64 has_v
1010
target riscv64 has_v has_c has_zcb
11+
target pulley32
12+
target pulley32be
13+
target pulley64
14+
target pulley64be
1115

1216
function %splat_f32x4_2(f32x4) -> f32x4 {
1317
block0(v0: f32x4):

cranelift/filetests/filetests/runtests/simd-fadd.clif

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
99
set enable_multi_ret_implicit_sret
1010
target riscv64 has_v
1111
target riscv64 has_v has_c has_zcb
12+
target pulley32
13+
target pulley32be
14+
target pulley64
15+
target pulley64be
1216

1317

1418
function %fadd_f32x4(f32x4, f32x4) -> f32x4 {

cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %simd_fcmp_eq_f32(f32x4, f32x4) -> i32x4 {
1115
block0(v0: f32x4, v1: f32x4):

cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %simd_fcmp_ge_f32(f32x4, f32x4) -> i32x4 {
1115
block0(v0: f32x4, v1: f32x4):

cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %simd_fcmp_gt_f32(f32x4, f32x4) -> i32x4 {
1115
block0(v0: f32x4, v1: f32x4):

cranelift/filetests/filetests/runtests/simd-fcmp-le.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %simd_fcmp_le_f32(f32x4, f32x4) -> i32x4 {
1115
block0(v0: f32x4, v1: f32x4):

cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %simd_fcmp_lt_f32(f32x4, f32x4) -> i32x4 {
1115
block0(v0: f32x4, v1: f32x4):

cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %simd_fcmp_ne_f32(f32x4, f32x4) -> i32x4 {
1115
block0(v0: f32x4, v1: f32x4):

cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %simd_fcmp_uno_f32(f32x4, f32x4) -> i32x4 {
1115
block0(v0: f32x4, v1: f32x4):

cranelift/filetests/filetests/runtests/simd-fdiv.clif

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
99
set enable_multi_ret_implicit_sret
1010
target riscv64 has_v
1111
target riscv64 has_v has_c has_zcb
12+
target pulley32
13+
target pulley32be
14+
target pulley64
15+
target pulley64be
1216

1317

1418
function %fdiv_f32x4(f32x4, f32x4) -> f32x4 {

cranelift/filetests/filetests/runtests/simd-floor.clif

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ target s390x
99
set enable_multi_ret_implicit_sret
1010
target riscv64 has_v
1111
target riscv64 has_v has_c has_zcb
12+
target pulley32
13+
target pulley32be
14+
target pulley64
15+
target pulley64be
1216

1317
function %floor_f32x4(f32x4) -> f32x4 {
1418
block0(v0: f32x4):

cranelift/filetests/filetests/runtests/simd-fma-neg.clif

+4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ target aarch64
55
set enable_multi_ret_implicit_sret
66
target riscv64 has_v
77
target riscv64 has_v has_c has_zcb
8+
target pulley32
9+
target pulley32be
10+
target pulley64
11+
target pulley64be
812

913
;; This file is not enabled in the interpreter since SIMD fneg is currently broken
1014
;; there.

cranelift/filetests/filetests/runtests/simd-fma.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target aarch64
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %fma_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
1115
block0(v0: f32x4, v1: f32x4, v2: f32x4):

cranelift/filetests/filetests/runtests/simd-fmin-max-pseudo.clif

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ target x86_64 skylake
66
set enable_multi_ret_implicit_sret
77
target riscv64 has_v
88
target riscv64 has_v has_c has_zcb
9+
target pulley32
10+
target pulley32be
11+
target pulley64
12+
target pulley64be
913

1014
function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 {
1115
block0(v0:f32x4, v1:f32x4):

cranelift/filetests/filetests/runtests/simd-fmul.clif

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
88
set enable_multi_ret_implicit_sret
99
target riscv64 has_v
1010
target riscv64 has_v has_c has_zcb
11+
target pulley32
12+
target pulley32be
13+
target pulley64
14+
target pulley64be
1115

1216

1317
function %fmul_f32x4(f32x4, f32x4) -> f32x4 {

cranelift/filetests/filetests/runtests/simd-fneg.clif

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
99
set enable_multi_ret_implicit_sret
1010
target riscv64 has_v
1111
target riscv64 has_v has_c has_zcb
12+
target pulley32
13+
target pulley32be
14+
target pulley64
15+
target pulley64be
1216

1317
function %fneg_f32x4(f32x4) -> f32x4 {
1418
block0(v0: f32x4):

cranelift/filetests/filetests/runtests/simd-fsub.clif

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
88
set enable_multi_ret_implicit_sret
99
target riscv64 has_v
1010
target riscv64 has_v has_c has_zcb
11+
target pulley32
12+
target pulley32be
13+
target pulley64
14+
target pulley64be
1115

1216

1317
function %fsub_f32x4(f32x4, f32x4) -> f32x4 {

crates/math/src/lib.rs

+9-5
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ pub trait WasmFloat {
3131
fn wasm_nearest(self) -> Self;
3232
fn wasm_minimum(self, other: Self) -> Self;
3333
fn wasm_maximum(self, other: Self) -> Self;
34-
fn mul_add(self, b: Self, c: Self) -> Self;
34+
fn wasm_mul_add(self, b: Self, c: Self) -> Self;
3535
}
3636

3737
impl WasmFloat for f32 {
@@ -148,9 +148,11 @@ impl WasmFloat for f32 {
148148
}
149149
}
150150
#[inline]
151-
fn mul_add(self, b: f32, c: f32) -> f32 {
151+
fn wasm_mul_add(self, b: f32, c: f32) -> f32 {
152+
// The MinGW implementation of `fma` differs from other platforms, so
153+
// favor `libm` there instead.
152154
#[cfg(feature = "std")]
153-
if true {
155+
if !(cfg!(windows) && cfg!(target_env = "gnu")) {
154156
return self.mul_add(b, c);
155157
}
156158
libm::fmaf(self, b, c)
@@ -271,9 +273,11 @@ impl WasmFloat for f64 {
271273
}
272274
}
273275
#[inline]
274-
fn mul_add(self, b: f64, c: f64) -> f64 {
276+
fn wasm_mul_add(self, b: f64, c: f64) -> f64 {
277+
// The MinGW implementation of `fma` differs from other platforms, so
278+
// favor `libm` there instead.
275279
#[cfg(feature = "std")]
276-
if true {
280+
if !(cfg!(windows) && cfg!(target_env = "gnu")) {
277281
return self.mul_add(b, c);
278282
}
279283
libm::fma(self, b, c)

crates/wasmtime/src/runtime/vm/libcalls.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1289,11 +1289,11 @@ pub mod relocs {
12891289
}
12901290

12911291
pub extern "C" fn fmaf32(a: f32, b: f32, c: f32) -> f32 {
1292-
wasmtime_math::WasmFloat::mul_add(a, b, c)
1292+
wasmtime_math::WasmFloat::wasm_mul_add(a, b, c)
12931293
}
12941294

12951295
pub extern "C" fn fmaf64(a: f64, b: f64, c: f64) -> f64 {
1296-
wasmtime_math::WasmFloat::mul_add(a, b, c)
1296+
wasmtime_math::WasmFloat::wasm_mul_add(a, b, c)
12971297
}
12981298

12991299
// This intrinsic is only used on x86_64 platforms as an implementation of

crates/wast-util/src/lib.rs

-8
Original file line numberDiff line numberDiff line change
@@ -401,17 +401,9 @@ impl WastTest {
401401
// features in Pulley are implemented.
402402
if config.compiler == Compiler::CraneliftPulley {
403403
let unsupported = [
404-
"misc_testsuite/simd/canonicalize-nan.wast",
405-
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
406404
"misc_testsuite/simd/v128-select.wast",
407405
"spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast",
408-
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
409-
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
410406
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
411-
"spec_testsuite/simd_f32x4_cmp.wast",
412-
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
413-
"spec_testsuite/simd_f64x2_cmp.wast",
414-
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
415407
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
416408
"spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast",
417409
"spec_testsuite/simd_load.wast",

0 commit comments

Comments
 (0)