From d9ff90cc19c4d0714aca4e394784b21d29a0c8f1 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 11 Oct 2025 11:53:01 +0800 Subject: [PATCH] [LoongArch][NFC] Pre-commit tests for flog2 --- .../CodeGen/LoongArch/ir-instruction/flog2.ll | 32 +++ .../LoongArch/lasx/ir-instruction/flog2.ll | 264 ++++++++++++++++++ .../LoongArch/lsx/ir-instruction/flog2.ll | 162 +++++++++++ 3 files changed, 458 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll new file mode 100644 index 0000000000000..93fcd421e4bd7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +declare float @llvm.log2.f32(float) +declare double @llvm.log2.f64(double) + +define float @flog2_s(float %x) nounwind { +; LA32-LABEL: flog2_s: +; LA32: # %bb.0: +; LA32-NEXT: b log2f +; +; LA64-LABEL: flog2_s: +; LA64: # %bb.0: +; LA64-NEXT: pcaddu18i $t8, %call36(log2f) +; LA64-NEXT: jr $t8 + %y = call float @llvm.log2.f32(float %x) + ret float %y +} + +define double @flog2_d(double %x) nounwind { +; LA32-LABEL: flog2_d: +; LA32: # %bb.0: +; LA32-NEXT: b log2 +; +; LA64-LABEL: flog2_d: +; LA64: # %bb.0: +; LA64-NEXT: pcaddu18i $t8, %call36(log2) +; LA64-NEXT: jr $t8 + %y = call double @llvm.log2.f64(double %x) + ret double %y +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll new file mode 100644 index 0000000000000..68f2e3ab488e1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 + +declare <8 x float> @llvm.log2.v8f32(<8 x float>) +declare <4 x double> @llvm.log2.v4f64(<4 x double>) + +define void @flog2_v8f32(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v8f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: xvpickve.w $xr0, $xr0, 5 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 4 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 6 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 32 +; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 7 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 48 +; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 1 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 0 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 2 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 32 +; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 3 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 48 +; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload +; LA32-NEXT: xvpermi.q $xr1, $xr0, 2 +; LA32-NEXT: xvst $xr1, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v8f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -128 +; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: xvpickve.w $xr0, $xr0, 5 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 4 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr0, $vr1, 16 +; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 6 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 32 +; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 7 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 48 +; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 1 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr0, $vr1, 16 +; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 2 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 32 +; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 3 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 48 +; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload +; LA64-NEXT: xvpermi.q $xr1, $xr0, 2 +; LA64-NEXT: xvst $xr1, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 128 +; LA64-NEXT: ret +entry: + %v = load <8 x float>, ptr %a + %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v) + store <8 x float> %r, ptr %res + ret void +} + +define void @flog2_v4f64(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -112 +; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: xvpickve.d $xr0, $xr0, 3 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.d $xr0, $xr0, 2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.d $xr0, $xr0, 1 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.d $xr0, $xr0, 0 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload +; LA32-NEXT: xvpermi.q $xr0, $xr1, 2 +; LA32-NEXT: xvst $xr0, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 112 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -112 +; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: xvpickve.d $xr0, $xr0, 3 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.d $xr0, $xr0, 2 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.d $xr0, $xr0, 1 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.d $xr0, $xr0, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload +; LA64-NEXT: xvpermi.q $xr0, $xr1, 2 +; LA64-NEXT: xvst $xr0, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 112 +; LA64-NEXT: ret +entry: + %v = load <4 x double>, ptr %a + %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v) + store <4 x double> %r, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll new file mode 100644 index 0000000000000..e5e75ec617b51 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 + +declare <4 x float> @llvm.log2.v4f32(<4 x float>) +declare <2 x double> @llvm.log2.v2f64(<2 x double>) + +define void @flog2_v4f32(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v4f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 0 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 2 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 32 +; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 3 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 48 +; LA32-NEXT: vst $vr1, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v4f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr0, $vr1, 16 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 2 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 32 +; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 3 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 48 +; LA64-NEXT: vst $vr1, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %v = load <4 x float>, ptr %a + %r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v) + store <4 x float> %r, ptr %res + ret void +} + +define void @flog2_v2f64(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.d $vr0, $vr0, 0 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: vst $vr0, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: vst $vr0, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %v = load <2 x double>, ptr %a + %r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v) + store <2 x double> %r, ptr %res + ret void +}