Skip to content

Conversation

tangaac
Copy link
Contributor

@tangaac tangaac commented Aug 29, 2025

PR #135896 introduces [x]vldrepl instructions without handling extension.
This patch will fix that.

@llvmbot
Copy link
Member

llvmbot commented Aug 29, 2025

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

Changes

PR #135896 introduces [x]vldrepl instructions without handling extension.
This patch will fix that.


Full diff: https://github.com/llvm/llvm-project/pull/155960.diff

3 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+2-1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll (+38-6)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll (+39-6)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ffb6c2980026f..478c335c3f07e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2471,8 +2471,9 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
   if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
     return SDValue();
 
-  if (IsIdeneity) {
+  if (IsIdeneity && ISD::isNON_EXTLoad(IdentitySrc.getNode())) {
     auto *LN = cast<LoadSDNode>(IdentitySrc);
+
     SDVTList Tys =
         LN->isIndexed()
             ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
index 976924bdca686..89592a0886cc1 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
   ret <4 x i64> %tmp2
 }
 
+define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
 define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:
@@ -34,7 +60,8 @@ define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
 define <32 x i8> @xvldrepl_b(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.b $xr0, $a0, 0
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i8, ptr %ptr
   %tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <32 x i8> @xvldrepl_b(ptr %ptr) {
 define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_b_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.b $xr0, $a0, 33
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i8, ptr %ptr, i64 33
   %tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
 define <16 x i16> @xvldrepl_h(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_h:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.h $xr0, $a0, 0
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i16, ptr %ptr
   %tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <16 x i16> @xvldrepl_h(ptr %ptr) {
 define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_h_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.h $xr0, $a0, 66
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i16, ptr %ptr, i64 33
   %tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
 define <8 x i32> @xvldrepl_w(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_w:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.w $xr0, $a0, 0
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i32, ptr %ptr
   %tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <8 x i32> @xvldrepl_w(ptr %ptr) {
 define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_w_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.w $xr0, $a0, 132
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i32, ptr %ptr, i64 33
   %tmp = load i32, ptr %p
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index c46747ef30509..a8cddbf9e6400 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
   ret <2 x i64> %tmp2
 }
 
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
 define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:
@@ -34,7 +60,8 @@ define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
 define <16 x i8> @vldrepl_b(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.b $vr0, $a0, 0
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i8, ptr %ptr
   %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <16 x i8> @vldrepl_b(ptr %ptr) {
 define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_b_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.b $vr0, $a0, 33
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i8, ptr %ptr, i64 33
   %tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
 define <8 x i16> @vldrepl_h(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_h:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.h $vr0, $a0, 0
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i16, ptr %ptr
   %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <8 x i16> @vldrepl_h(ptr %ptr) {
 define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_h_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.h $vr0, $a0, 66
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i16, ptr %ptr, i64 33
   %tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
 define <4 x i32> @vldrepl_w(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_w:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.w $vr0, $a0, 0
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i32, ptr %ptr
   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <4 x i32> @vldrepl_w(ptr %ptr) {
 define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_w_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.w $vr0, $a0, 132
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i32, ptr %ptr, i64 33
   %tmp = load i32, ptr %p
@@ -169,3 +201,4 @@ define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
   %tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
   ret <2 x double> %tmp2
 }
+

auto *LN = cast<LoadSDNode>(IdentitySrc);
auto ExtType = LN->getExtensionType();

if (IsIdeneity && (ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it makes sense to precisely match the sizes of VT.getVectorElementVT() and LN->getMemoryVT()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It matches the case like this.

    t2: i64,ch = CopyFromReg t0, Register:i64 %0
  t16: i64,ch = load<(load (s8) from %ir.ptr), anyext from i8> t0, t2, undef:i64


    t2: i64,ch = CopyFromReg t0, Register:i64 %0
  t5: i64,ch = load<(load (s64) from %ir.ptr)> t0, t2, undef:i64

@zhaoqi5
Copy link
Contributor

zhaoqi5 commented Aug 29, 2025

The TODO at the top of the test files may can be deleted.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Status: Needs Triage
Development

Successfully merging this pull request may close these issues.

5 participants