From 6296ebd45d3f916bea6bf434c1b5580441f9234a Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Tue, 15 Jul 2025 15:59:05 +0200 Subject: [PATCH 1/9] Bump version to 21.1.0-git --- cmake/Modules/LLVMVersion.cmake | 2 +- libcxx/include/__config | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- llvm/utils/mlgo-utils/mlgo/__init__.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index f14aae172f077..c12240f98e97f 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -4,7 +4,7 @@ if(NOT DEFINED LLVM_VERSION_MAJOR) set(LLVM_VERSION_MAJOR 21) endif() if(NOT DEFINED LLVM_VERSION_MINOR) - set(LLVM_VERSION_MINOR 0) + set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) set(LLVM_VERSION_PATCH 0) diff --git a/libcxx/include/__config b/libcxx/include/__config index d940461c30234..8f215bbe47928 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -28,7 +28,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 210000 +# define _LIBCPP_VERSION 210100 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 2b1a9076afe4a..ece4106de4aca 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 21 -llvm_version_minor = 0 +llvm_version_minor = 1 llvm_version_patch = 0 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index b5aa8edc03dc7..520ff22dc6fb0 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = "Daniel Dunbar" __email__ = "daniel@minormatter.com" -__versioninfo__ = (21, 0, 0) +__versioninfo__ = (21, 1, 0) __version__ = ".".join(str(v) for v in __versioninfo__) + "dev" __all__ = [] diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py index d3369abae70b9..03eee0028b3cc 100644 --- a/llvm/utils/mlgo-utils/mlgo/__init__.py +++ b/llvm/utils/mlgo-utils/mlgo/__init__.py @@ -4,7 +4,7 @@ from datetime import timezone, datetime -__versioninfo__ = (20, 0, 0) +__versioninfo__ = (21, 1, 0) __version__ = ( ".".join(str(v) for v in __versioninfo__) + "dev" From 18624ae54bc979e47ad990721eb20eb9ca982a2f Mon Sep 17 00:00:00 2001 From: Martin Erhart Date: Tue, 15 Jul 2025 14:48:05 +0100 Subject: [PATCH 2/9] [mlir][SliceAnalysis] Fix stack overflow in graph regions (#139694) This analysis currently just crashes when applied to a graph region that has a use-def cycle. This PR fixes that by keeping track of the operations the DFS has already visited when following use-def edges and stopping once we visit an operation again. --- mlir/include/mlir/Analysis/SliceAnalysis.h | 10 ++-- mlir/lib/Analysis/SliceAnalysis.cpp | 65 ++++++++++++++++----- mlir/test/Dialect/Affine/slicing-utils.mlir | 23 ++++++++ 3 files changed, 79 insertions(+), 19 deletions(-) diff --git a/mlir/include/mlir/Analysis/SliceAnalysis.h b/mlir/include/mlir/Analysis/SliceAnalysis.h index d082d2d9f758b..18349d071bb2e 100644 --- a/mlir/include/mlir/Analysis/SliceAnalysis.h +++ b/mlir/include/mlir/Analysis/SliceAnalysis.h @@ -65,8 +65,9 @@ using ForwardSliceOptions = SliceOptions; /// /// The implementation traverses the use chains in postorder traversal for /// efficiency reasons: if an operation is already in `forwardSlice`, no -/// need to traverse its uses again. Since use-def chains form a DAG, this -/// terminates. +/// need to traverse its uses again. In the presence of use-def cycles in a +/// graph region, the traversal stops at the first operation that was already +/// visited (which is not added to the slice anymore). /// /// Upon return to the root call, `forwardSlice` is filled with a /// postorder list of uses (i.e. a reverse topological order). To get a proper @@ -114,8 +115,9 @@ void getForwardSlice(Value root, SetVector *forwardSlice, /// /// The implementation traverses the def chains in postorder traversal for /// efficiency reasons: if an operation is already in `backwardSlice`, no -/// need to traverse its definitions again. Since useuse-def chains form a DAG, -/// this terminates. +/// need to traverse its definitions again. In the presence of use-def cycles +/// in a graph region, the traversal stops at the first operation that was +/// already visited (which is not added to the slice anymore). /// /// Upon return to the root call, `backwardSlice` is filled with a /// postorder list of defs. This happens to be a topological order, from the diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp index 36a9812bd7972..991c71e3f689a 100644 --- a/mlir/lib/Analysis/SliceAnalysis.cpp +++ b/mlir/lib/Analysis/SliceAnalysis.cpp @@ -26,7 +26,8 @@ using namespace mlir; static void -getForwardSliceImpl(Operation *op, SetVector *forwardSlice, +getForwardSliceImpl(Operation *op, DenseSet &visited, + SetVector *forwardSlice, const SliceOptions::TransitiveFilter &filter = nullptr) { if (!op) return; @@ -40,20 +41,41 @@ getForwardSliceImpl(Operation *op, SetVector *forwardSlice, for (Region ®ion : op->getRegions()) for (Block &block : region) for (Operation &blockOp : block) - if (forwardSlice->count(&blockOp) == 0) - getForwardSliceImpl(&blockOp, forwardSlice, filter); - for (Value result : op->getResults()) { - for (Operation *userOp : result.getUsers()) - if (forwardSlice->count(userOp) == 0) - getForwardSliceImpl(userOp, forwardSlice, filter); - } + if (forwardSlice->count(&blockOp) == 0) { + // We don't have to check if the 'blockOp' is already visited because + // there cannot be a traversal path from this nested op to the parent + // and thus a cycle cannot be closed here. We still have to mark it + // as visited to stop before visiting this operation again if it is + // part of a cycle. + visited.insert(&blockOp); + getForwardSliceImpl(&blockOp, visited, forwardSlice, filter); + visited.erase(&blockOp); + } + + for (Value result : op->getResults()) + for (Operation *userOp : result.getUsers()) { + // A cycle can only occur within a basic block (not across regions or + // basic blocks) because the parent region must be a graph region, graph + // regions are restricted to always have 0 or 1 blocks, and there cannot + // be a def-use edge from a nested operation to an operation in an + // ancestor region. Therefore, we don't have to but may use the same + // 'visited' set across regions/blocks as long as we remove operations + // from the set again when the DFS traverses back from the leaf to the + // root. + if (forwardSlice->count(userOp) == 0 && visited.insert(userOp).second) + getForwardSliceImpl(userOp, visited, forwardSlice, filter); + + visited.erase(userOp); + } forwardSlice->insert(op); } void mlir::getForwardSlice(Operation *op, SetVector *forwardSlice, const ForwardSliceOptions &options) { - getForwardSliceImpl(op, forwardSlice, options.filter); + DenseSet visited; + visited.insert(op); + getForwardSliceImpl(op, visited, forwardSlice, options.filter); if (!options.inclusive) { // Don't insert the top level operation, we just queried on it and don't // want it in the results. @@ -69,8 +91,12 @@ void mlir::getForwardSlice(Operation *op, SetVector *forwardSlice, void mlir::getForwardSlice(Value root, SetVector *forwardSlice, const SliceOptions &options) { - for (Operation *user : root.getUsers()) - getForwardSliceImpl(user, forwardSlice, options.filter); + DenseSet visited; + for (Operation *user : root.getUsers()) { + visited.insert(user); + getForwardSliceImpl(user, visited, forwardSlice, options.filter); + visited.erase(user); + } // Reverse to get back the actual topological order. // std::reverse does not work out of the box on SetVector and I want an @@ -80,6 +106,7 @@ void mlir::getForwardSlice(Value root, SetVector *forwardSlice, } static LogicalResult getBackwardSliceImpl(Operation *op, + DenseSet &visited, SetVector *backwardSlice, const BackwardSliceOptions &options) { if (!op || op->hasTrait()) @@ -93,8 +120,12 @@ static LogicalResult getBackwardSliceImpl(Operation *op, auto processValue = [&](Value value) { if (auto *definingOp = value.getDefiningOp()) { - if (backwardSlice->count(definingOp) == 0) - return getBackwardSliceImpl(definingOp, backwardSlice, options); + if (backwardSlice->count(definingOp) == 0 && + visited.insert(definingOp).second) + return getBackwardSliceImpl(definingOp, visited, backwardSlice, + options); + + visited.erase(definingOp); } else if (auto blockArg = dyn_cast(value)) { if (options.omitBlockArguments) return success(); @@ -107,7 +138,8 @@ static LogicalResult getBackwardSliceImpl(Operation *op, if (parentOp && backwardSlice->count(parentOp) == 0) { if (parentOp->getNumRegions() == 1 && llvm::hasSingleElement(parentOp->getRegion(0).getBlocks())) { - return getBackwardSliceImpl(parentOp, backwardSlice, options); + return getBackwardSliceImpl(parentOp, visited, backwardSlice, + options); } } } else { @@ -145,7 +177,10 @@ static LogicalResult getBackwardSliceImpl(Operation *op, LogicalResult mlir::getBackwardSlice(Operation *op, SetVector *backwardSlice, const BackwardSliceOptions &options) { - LogicalResult result = getBackwardSliceImpl(op, backwardSlice, options); + DenseSet visited; + visited.insert(op); + LogicalResult result = + getBackwardSliceImpl(op, visited, backwardSlice, options); if (!options.inclusive) { // Don't insert the top level operation, we just queried on it and don't diff --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir index 0848a924b9d96..c53667a98cfbe 100644 --- a/mlir/test/Dialect/Affine/slicing-utils.mlir +++ b/mlir/test/Dialect/Affine/slicing-utils.mlir @@ -292,3 +292,26 @@ func.func @slicing_test_multiple_return(%arg0: index) -> (index, index) { %0:2 = "slicing-test-op"(%arg0, %arg0): (index, index) -> (index, index) return %0#0, %0#1 : index, index } + +// ----- + +// FWD-LABEL: graph_region_with_cycle +// BWD-LABEL: graph_region_with_cycle +// FWDBWD-LABEL: graph_region_with_cycle +func.func @graph_region_with_cycle() { + test.isolated_graph_region { + // FWD: matched: [[V0:%.+]] = "slicing-test-op"([[V1:%.+]]) : (i1) -> i1 forward static slice: + // FWD: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 + // FWD: matched: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 forward static slice: + // FWD: [[V0]] = "slicing-test-op"([[V1]]) : (i1) -> i1 + + // BWD: matched: [[V0:%.+]] = "slicing-test-op"([[V1:%.+]]) : (i1) -> i1 backward static slice: + // BWD: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 + // BWD: matched: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 backward static slice: + // BWD: [[V0]] = "slicing-test-op"([[V1]]) : (i1) -> i1 + %0 = "slicing-test-op"(%1) : (i1) -> i1 + %1 = "slicing-test-op"(%0) : (i1) -> i1 + } + + return +} From 588b8130794f7ce53fe30237f6bf5614b7122f45 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 15 Jul 2025 14:53:05 +0100 Subject: [PATCH 3/9] [AArch64] Use correct regclass for spills of ZPR2/ZPR4 (#148806) Commit a6293228fdd5aba8c04c63f02f3d017443feb3f2 forced the register class of ZPR[24]StridedOrContiguous for spills/fills of ZPR2 and ZPR4, but this may result in issues when the regclass for the fill is a ZPR2/ZPR4 which would allow the register allocator to pick `z1_z2`, which is not a supported register for ZPR2StridedOrContiguous that only supports tuples of the form (strided) `z0_z8`, `z1_z9` or (contiguous, start at multiple of 2) `z0_z1`, `z2_z3`. For spills we could add a new register class that supports any of the tuple forms, but I've decided to use two pseudos similar to the fills for consistency. Fixes https://github.com/llvm/llvm-project/issues/148655 --- .../AArch64/AArch64ExpandPseudoInsts.cpp | 4 ++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 40 ++++++++++++--- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 18 ++++--- llvm/test/CodeGen/AArch64/spillfill-sve.mir | 49 ++++++++++--------- 4 files changed, 73 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 36f3a670808d4..07b36d20b0c6d 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1591,18 +1591,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, "Non-writeback variants of STGloop / STZGloop should not " "survive past PrologEpilogInserter."); case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); case AArch64::STR_ZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); case AArch64::STR_PPXI: return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); case AArch64::LDR_ZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); case AArch64::LDR_PPXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index c1474773faa76..5420545cc3cec 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2482,8 +2482,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::LDR_PXI: case AArch64::LDR_ZXI: case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZZXI: case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::LDRBBui: case AArch64::LDRBui: case AArch64::LDRDui: @@ -2525,8 +2527,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STR_PXI: case AArch64::STR_ZXI: case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::STR_ZZZXI: case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::STRBBui: case AArch64::STRBui: case AArch64::STRDui: @@ -4318,7 +4322,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, break; // SVE case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: Scale = TypeSize::getScalable(16); Width = TypeSize::getScalable(16 * 4); MinOffset = -256; @@ -4332,7 +4338,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MaxOffset = 253; break; case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: Scale = TypeSize::getScalable(16); Width = TypeSize::getScalable(16 * 2); MinOffset = -256; @@ -5559,8 +5567,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; - } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) || - AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register store without SVE store instructions"); + Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZZXI; @@ -5584,8 +5596,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; - } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) || - AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register store without SVE store instructions"); + Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZZZZXI; @@ -5736,8 +5752,12 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; - } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) || - AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register load without SVE load instructions"); + Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZZXI; @@ -5761,8 +5781,12 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; - } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) || - AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register load without SVE load instructions"); + Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZZZZXI; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index eddb96979f7b8..0c4b4f4c3ed88 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2625,16 +2625,22 @@ let Predicates = [HasSVE_or_SME] in { // These get expanded to individual LDR_ZXI/STR_ZXI instructions in // AArch64ExpandPseudoInsts. let mayLoad = 1, hasSideEffects = 0 in { - def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + + def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let mayStore = 1, hasSideEffects = 0 in { - def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + + def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let AddedComplexity = 1 in { diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir index 83c9b73c57570..2b16dd0f29ecc 100644 --- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -1,5 +1,5 @@ -# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s -# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s --check-prefix=EXPAND --- | ; ModuleID = '' source_filename = "" @@ -14,13 +14,14 @@ define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable } - define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #2 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable } - define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #2 { entry: unreachable } attributes #0 = { nounwind "target-features"="+sve" } attributes #1 = { nounwind "target-features"="+sve2p1" } + attributes #2 = { nounwind "target-features"="+sve,+sme2" "aarch64_pstate_sm_enabled" } ... --- @@ -318,10 +319,10 @@ registers: - { id: 0, class: zpr2 } stack: liveins: - - { reg: '$z0_z1', virtual-reg: '%0' } + - { reg: '$z1_z2', virtual-reg: '%0' } body: | bb.0.entry: - liveins: $z0_z1 + liveins: $z1_z2 ; CHECK-LABEL: name: spills_fills_stack_id_zpr2 ; CHECK: stack: @@ -329,12 +330,12 @@ body: | ; CHECK-NEXT: stack-id: scalable-vector ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2 - ; EXPAND: STR_ZXI $z0, $sp, 0 - ; EXPAND: STR_ZXI $z1, $sp, 1 - ; EXPAND: $z0 = LDR_ZXI $sp, 0 - ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: STR_ZXI $z1, $sp, 0 + ; EXPAND: STR_ZXI $z2, $sp, 1 + ; EXPAND: $z1 = LDR_ZXI $sp, 0 + ; EXPAND: $z2 = LDR_ZXI $sp, 1 - %0:zpr2 = COPY $z0_z1 + %0:zpr2 = COPY $z1_z2 $z0_z1_z2_z3 = IMPLICIT_DEF $z4_z5_z6_z7 = IMPLICIT_DEF @@ -345,7 +346,7 @@ body: | $z24_z25_z26_z27 = IMPLICIT_DEF $z28_z29_z30_z31 = IMPLICIT_DEF - $z0_z1 = COPY %0 + $z1_z2 = COPY %0 RET_ReallyLR ... --- @@ -439,10 +440,10 @@ registers: - { id: 0, class: zpr4 } stack: liveins: - - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' } + - { reg: '$z1_z2_z3_z4', virtual-reg: '%0' } body: | bb.0.entry: - liveins: $z0_z1_z2_z3 + liveins: $z1_z2_z3_z4 ; CHECK-LABEL: name: spills_fills_stack_id_zpr4 ; CHECK: stack: @@ -450,16 +451,16 @@ body: | ; CHECK-NEXT: stack-id: scalable-vector ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4 - ; EXPAND: STR_ZXI $z0, $sp, 0 - ; EXPAND: STR_ZXI $z1, $sp, 1 - ; EXPAND: STR_ZXI $z2, $sp, 2 - ; EXPAND: STR_ZXI $z3, $sp, 3 - ; EXPAND: $z0 = LDR_ZXI $sp, 0 - ; EXPAND: $z1 = LDR_ZXI $sp, 1 - ; EXPAND: $z2 = LDR_ZXI $sp, 2 - ; EXPAND: $z3 = LDR_ZXI $sp, 3 + ; EXPAND: STR_ZXI $z1, $sp, 0 + ; EXPAND: STR_ZXI $z2, $sp, 1 + ; EXPAND: STR_ZXI $z3, $sp, 2 + ; EXPAND: STR_ZXI $z4, $sp, 3 + ; EXPAND: $z1 = LDR_ZXI $sp, 0 + ; EXPAND: $z2 = LDR_ZXI $sp, 1 + ; EXPAND: $z3 = LDR_ZXI $sp, 2 + ; EXPAND: $z4 = LDR_ZXI $sp, 3 - %0:zpr4 = COPY $z0_z1_z2_z3 + %0:zpr4 = COPY $z1_z2_z3_z4 $z0_z1_z2_z3 = IMPLICIT_DEF $z4_z5_z6_z7 = IMPLICIT_DEF @@ -470,7 +471,7 @@ body: | $z24_z25_z26_z27 = IMPLICIT_DEF $z28_z29_z30_z31 = IMPLICIT_DEF - $z0_z1_z2_z3 = COPY %0 + $z1_z2_z3_z4 = COPY %0 RET_ReallyLR ... --- From d1517ec62222584304951fcf63ce35d8fd0942f2 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 15 Jul 2025 14:53:47 +0100 Subject: [PATCH 4/9] [AArch64] Ensure bundle expansion of MOVPRFX gets correct implicit ops (#148824) By finalizing the bundle _after_ copying over the implicit-ops, it also adds any implicit-defs to the BUNDLE. Fixes https://github.com/llvm/llvm-project/issues/148645 --- .../AArch64/AArch64ExpandPseudoInsts.cpp | 2 +- .../AArch64/sve-pseudos-expand-undef.mir | 20 ++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 07b36d20b0c6d..7de66ccbf6f29 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -671,8 +671,8 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( } if (PRFX) { - finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); transferImpOps(MI, PRFX, DOP); + finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); } else transferImpOps(MI, DOP, DOP); diff --git a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir index ae70f91a4ec64..a1d615c910792 100644 --- a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir +++ b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir @@ -12,7 +12,7 @@ body: | bb.0: liveins: $p0, $z0 - ; CHECK: add_x + ; CHECK: name: add_x ; CHECK-NOT: MOVPRFX ; CHECK: $z0 = FADD_ZPmZ_S renamable $p0, killed $z0, renamable $z0 ; CHECK-NEXT: RET @@ -21,22 +21,36 @@ body: | ... -# CHECK: {{.*}} MSB_ZPmZZ_B {{.*}} --- name: expand_mls_to_msb body: | bb.0: + ; CHECK: name: expand_mls_to_msb + ; CHECK: {{.*}} MSB_ZPmZZ_B {{.*}} renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLS_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... -# CHECK: {{.*}} MAD_ZPmZZ_B {{.*}} --- name: expand_mla_to_mad body: | bb.0: + ; CHECK: name: expand_mla_to_mad + ; CHECK: {{.*}} MAD_ZPmZZ_B {{.*}} renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLA_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... + +--- +name: expand_transfer_implicit_defs +body: | + bb.0: + ; CHECK: name: expand_transfer_implicit_defs + ; CHECK: BUNDLE + ; CHECK-SAME: implicit-def $z0_z1_z2_z3 + liveins: $z1, $z2, $p0 + renamable $z0 = FADD_ZPZZ_D_UNDEF killed $p0, killed $z1, killed $z2, implicit-def $z0_z1_z2_z3 + RET_ReallyLR implicit $z0_z1_z2_z3 +... From 7d803c868ab96dabbd4cb47d0b3e60a78057e1b0 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 15 Jul 2025 14:59:18 +0100 Subject: [PATCH 5/9] [AArch64] Set the cache line size to 64 for the V2 and V3. (#148213) This sets the cache line size to 64 for the Neoverse V2 and V3. I've tested this with loop-interchange: it doesn't result in extra compile-times, but it does enable a lot more interchange. --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 0956823346795..2409cc862f21c 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -270,6 +270,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { break; case NeoverseV2: case NeoverseV3: + CacheLineSize = 64; EpilogueVectorizationMinVF = 8; MaxInterleaveFactor = 4; ScatterOverhead = 13; From a0895b4581bac8634596263b42d49c7f2e2d957f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 15 Jul 2025 09:00:15 -0500 Subject: [PATCH 6/9] [Frontend][OpenMP] Move isPrivatizingClause to OMP.h, NFC (#148644) --- .../Frontend/OpenMP/ConstructDecompositionT.h | 18 +----------------- llvm/include/llvm/Frontend/OpenMP/OMP.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index cdc80c88b7425..611bfe3f8aced 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -795,25 +795,9 @@ bool ConstructDecompositionT::applyClause( // assigned to which leaf constructs. // [5.2:340:33] - auto canMakePrivateCopy = [](llvm::omp::Clause id) { - switch (id) { - // Clauses with "privatization" property: - case llvm::omp::Clause::OMPC_firstprivate: - case llvm::omp::Clause::OMPC_in_reduction: - case llvm::omp::Clause::OMPC_lastprivate: - case llvm::omp::Clause::OMPC_linear: - case llvm::omp::Clause::OMPC_private: - case llvm::omp::Clause::OMPC_reduction: - case llvm::omp::Clause::OMPC_task_reduction: - return true; - default: - return false; - } - }; - bool applied = applyIf(node, [&](const auto &leaf) { return llvm::any_of(leaf.clauses, [&](const ClauseTy *n) { - return canMakePrivateCopy(n->id); + return llvm::omp::isPrivatizingClause(n->id); }); }); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h index 35dafc6d246f0..d44c33301bde7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h @@ -48,6 +48,22 @@ static constexpr inline bool canHaveIterator(Clause C) { } } +// Can clause C create a private copy of a variable. +static constexpr inline bool isPrivatizingClause(Clause C) { + switch (C) { + case OMPC_firstprivate: + case OMPC_in_reduction: + case OMPC_lastprivate: + case OMPC_linear: + case OMPC_private: + case OMPC_reduction: + case OMPC_task_reduction: + return true; + default: + return false; + } +} + static constexpr unsigned FallbackVersion = 52; LLVM_ABI ArrayRef getOpenMPVersions(); From 49722f1df1ef62de3b1b671c2d4a11c08be11774 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 15 Jul 2025 10:49:23 -0700 Subject: [PATCH 7/9] [RISCV] Remove incorrect and untested FrameIndex support from SelectAddrRegImm9. (#148779) To fold a FrameIndex, we need to teach eliminateFrameIndex to respect the uimm9 range. (cherry picked from commit 63d099af146a19bc8fd5a791d6184125e6cc42e7) --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 666c76b21e631..880e6b0d48892 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2936,8 +2936,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, /// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9. bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset) { - if (SelectAddrFrameIndex(Addr, Base, Offset)) - return true; + // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only + // a 9-bit immediate can be folded. SDLoc DL(Addr); MVT VT = Addr.getSimpleValueType(); @@ -2947,8 +2947,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base, if (isUInt<9>(CVal)) { Base = Addr.getOperand(0); - if (auto *FIN = dyn_cast(Base)) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); + // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only + // a 9-bit immediate can be folded. Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT); return true; } From b71c9a43664101ff46fa0a46041a238d369a7784 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Wed, 16 Jul 2025 00:31:33 +0530 Subject: [PATCH 8/9] [RISCV] Fix issues in ORI to QC.INSBI transformation (#148809) The transformation done in #147349 was incorrect since we were not passing the input node of the `OR` instruction to the `QC.INSBI` instruction leading to the generated instruction doing the wrong thing. In order to do this we first needed to add the output register to `QC.INSBI` as being both an input and output. The code produced after the above fix will need a copy (mv) to preserve the register input to the OR instruction if it has more than one use making the transformation net neutral ( `6-byte QC.E.ORI/ORAI` vs `2-byte C.MV + 4-byte QC.INSB`I). Avoid doing the transformation if there is more than one use of the input register to the OR instruction. (cherry picked from commit d67d91a9906366585162cebf292f923a3f28c8a6) --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 10 +++- llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 5 +- .../test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll | 1 + llvm/test/CodeGen/RISCV/xqcibm-insert.ll | 53 +++++++++++++++++++ 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 880e6b0d48892..186191abe12a2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -689,10 +689,16 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) { if (!isShiftedMask_32(C1) || isInt<12>(C1)) return false; + // INSBI will clobber the input register in N0. Bail out if we need a copy to + // preserve this value. + SDValue N0 = Node->getOperand(0); + if (!N0.hasOneUse()) + return false; + // If C1 is a shifted mask (but can't be formed as an ORI), // use a bitfield insert of -1. // Transform (or x, C1) - // -> (qc.insbi x, width, shift) + // -> (qc.insbi x, -1, width, shift) const unsigned Leading = llvm::countl_zero((uint32_t)C1); const unsigned Trailing = llvm::countr_zero((uint32_t)C1); const unsigned Width = 32 - Leading - Trailing; @@ -705,7 +711,7 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) { SDLoc DL(Node); MVT VT = Node->getSimpleValueType(0); - SDValue Ops[] = {CurDAG->getSignedTargetConstant(-1, DL, VT), + SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT), CurDAG->getTargetConstant(Width, DL, VT), CurDAG->getTargetConstant(Trailing, DL, VT)}; SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 26bb1e8d17857..c7cb6e237aeac 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -845,10 +845,11 @@ let Predicates = [HasVendorXqcibi, IsRV32] in { let Predicates = [HasVendorXqcibm, IsRV32] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def QC_INSBRI : QCIRVInstRI<0b1, simm11, "qc.insbri">; - def QC_INSBI : RVInstIBase<0b001, OPC_CUSTOM_0, (outs GPRNoX0:$rd), - (ins simm5:$imm5, uimm5_plus1:$width, + def QC_INSBI : RVInstIBase<0b001, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, simm5:$imm5, uimm5_plus1:$width, uimm5:$shamt), "qc.insbi", "$rd, $imm5, $width, $shamt"> { + let Constraints = "$rd = $rd_wb"; bits<5> imm5; bits<5> shamt; bits<5> width; diff --git a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll index f227fa9aa423d..2fa06517508ce 100644 --- a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll +++ b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll @@ -105,6 +105,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; ; RV32ZBBXQCIBM-LABEL: test_cttz_i16: ; RV32ZBBXQCIBM: # %bb.0: +; RV32ZBBXQCIBM-NEXT: not a0, a0 ; RV32ZBBXQCIBM-NEXT: qc.insbi a0, -1, 1, 16 ; RV32ZBBXQCIBM-NEXT: ctz a0, a0 ; RV32ZBBXQCIBM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll index 6b7f9ae856625..88054a691bad1 100644 --- a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll +++ b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll @@ -47,6 +47,29 @@ define i32 @test_insbi_mask(i32 %a) nounwind { ret i32 %or } +define i32 @test_insbi_mask_mv(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: test_insbi_mask_mv: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IXQCIBM-LABEL: test_insbi_mask_mv: +; RV32IXQCIBM: # %bb.0: +; RV32IXQCIBM-NEXT: mv a0, a1 +; RV32IXQCIBM-NEXT: qc.insbi a0, -1, 16, 0 +; RV32IXQCIBM-NEXT: ret +; +; RV32IXQCIBMZBS-LABEL: test_insbi_mask_mv: +; RV32IXQCIBMZBS: # %bb.0: +; RV32IXQCIBMZBS-NEXT: mv a0, a1 +; RV32IXQCIBMZBS-NEXT: qc.insbi a0, -1, 16, 0 +; RV32IXQCIBMZBS-NEXT: ret + %or = or i32 %b, 65535 + ret i32 %or +} + define i32 @test_insbi_shifted_mask(i32 %a) nounwind { ; RV32I-LABEL: test_insbi_shifted_mask: ; RV32I: # %bb.0: @@ -67,6 +90,36 @@ define i32 @test_insbi_shifted_mask(i32 %a) nounwind { ret i32 %or } +define i32 @test_insbi_shifted_mask_multiple_uses(i32 %a) nounwind { +; RV32I-LABEL: test_insbi_shifted_mask_multiple_uses: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 15 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: addi a0, a0, 10 +; RV32I-NEXT: xor a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IXQCIBM-LABEL: test_insbi_shifted_mask_multiple_uses: +; RV32IXQCIBM: # %bb.0: +; RV32IXQCIBM-NEXT: lui a1, 15 +; RV32IXQCIBM-NEXT: or a1, a1, a0 +; RV32IXQCIBM-NEXT: addi a0, a0, 10 +; RV32IXQCIBM-NEXT: xor a0, a0, a1 +; RV32IXQCIBM-NEXT: ret +; +; RV32IXQCIBMZBS-LABEL: test_insbi_shifted_mask_multiple_uses: +; RV32IXQCIBMZBS: # %bb.0: +; RV32IXQCIBMZBS-NEXT: lui a1, 15 +; RV32IXQCIBMZBS-NEXT: or a1, a1, a0 +; RV32IXQCIBMZBS-NEXT: addi a0, a0, 10 +; RV32IXQCIBMZBS-NEXT: xor a0, a0, a1 +; RV32IXQCIBMZBS-NEXT: ret + %or = or i32 %a, 61440 + %add = add i32 %a, 10 + %xor = xor i32 %or, %add + ret i32 %xor +} + define i32 @test_single_bit_set(i32 %a) nounwind { ; RV32I-LABEL: test_single_bit_set: ; RV32I: # %bb.0: From 04d4be501dc83fe411193a46c10e898898552731 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 15 Jul 2025 15:39:51 -0700 Subject: [PATCH 9/9] [libclang] Fix version for symbol clang_visitCXXMethods (#148958) Happened to spot this while looking at libclang.map for other reasons. clang_visitCXXMethods was added in LLVM 21, not LLVM 20. (cherry picked from commit 116110e1a93531a64d82f049b6e36403bc14f278) --- clang/tools/libclang/libclang.map | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map index d140a71e771a0..49c472e3833fd 100644 --- a/clang/tools/libclang/libclang.map +++ b/clang/tools/libclang/libclang.map @@ -435,12 +435,12 @@ LLVM_20 { clang_getTypePrettyPrinted; clang_isBeforeInTranslationUnit; clang_visitCXXBaseClasses; - clang_visitCXXMethods; }; LLVM_21 { global: clang_getFullyQualifiedName; + clang_visitCXXMethods; clang_Cursor_getGCCAssemblyTemplate; clang_Cursor_isGCCAssemblyHasGoto; clang_Cursor_getGCCAssemblyNumOutputs;