diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map index fabacecab50c9..39b8e690a710d 100644 --- a/clang/tools/libclang/libclang.map +++ b/clang/tools/libclang/libclang.map @@ -568,7 +568,6 @@ LLVM_20 { clang_getTypePrettyPrinted; clang_isBeforeInTranslationUnit; clang_visitCXXBaseClasses; - clang_visitCXXMethods; }; LLVM_21 { @@ -580,6 +579,7 @@ LLVM_21 { clang_experimental_DependencyScannerReproducerOptions_create; clang_experimental_DependencyScannerReproducerOptions_dispose; clang_experimental_DependencyScanner_generateReproducer; + clang_visitCXXMethods; clang_Cursor_getGCCAssemblyTemplate; clang_Cursor_isGCCAssemblyHasGoto; clang_Cursor_getGCCAssemblyNumOutputs; diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index f14aae172f077..c12240f98e97f 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -4,7 +4,7 @@ if(NOT DEFINED LLVM_VERSION_MAJOR) set(LLVM_VERSION_MAJOR 21) endif() if(NOT DEFINED LLVM_VERSION_MINOR) - set(LLVM_VERSION_MINOR 0) + set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) set(LLVM_VERSION_PATCH 0) diff --git a/libcxx/include/__config b/libcxx/include/__config index d940461c30234..8f215bbe47928 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -28,7 +28,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 210000 +# define _LIBCPP_VERSION 210100 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index cdc80c88b7425..611bfe3f8aced 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -795,25 +795,9 @@ bool ConstructDecompositionT::applyClause( // assigned to which leaf constructs. // [5.2:340:33] - auto canMakePrivateCopy = [](llvm::omp::Clause id) { - switch (id) { - // Clauses with "privatization" property: - case llvm::omp::Clause::OMPC_firstprivate: - case llvm::omp::Clause::OMPC_in_reduction: - case llvm::omp::Clause::OMPC_lastprivate: - case llvm::omp::Clause::OMPC_linear: - case llvm::omp::Clause::OMPC_private: - case llvm::omp::Clause::OMPC_reduction: - case llvm::omp::Clause::OMPC_task_reduction: - return true; - default: - return false; - } - }; - bool applied = applyIf(node, [&](const auto &leaf) { return llvm::any_of(leaf.clauses, [&](const ClauseTy *n) { - return canMakePrivateCopy(n->id); + return llvm::omp::isPrivatizingClause(n->id); }); }); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h index 35dafc6d246f0..d44c33301bde7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h @@ -48,6 +48,22 @@ static constexpr inline bool canHaveIterator(Clause C) { } } +// Can clause C create a private copy of a variable. +static constexpr inline bool isPrivatizingClause(Clause C) { + switch (C) { + case OMPC_firstprivate: + case OMPC_in_reduction: + case OMPC_lastprivate: + case OMPC_linear: + case OMPC_private: + case OMPC_reduction: + case OMPC_task_reduction: + return true; + default: + return false; + } +} + static constexpr unsigned FallbackVersion = 52; LLVM_ABI ArrayRef getOpenMPVersions(); diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 0d22ffe63e0d6..220f84270361e 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -671,8 +671,8 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( } if (PRFX) { - finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); transferImpOps(MI, PRFX, DOP); + finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); } else transferImpOps(MI, DOP, DOP); @@ -1622,18 +1622,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, "Non-writeback variants of STGloop / STZGloop should not " "survive past PrologEpilogInserter."); case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); case AArch64::STR_ZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); case AArch64::STR_PPXI: return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); case AArch64::LDR_ZZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); case AArch64::LDR_PPXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index c1474773faa76..5420545cc3cec 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2482,8 +2482,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::LDR_PXI: case AArch64::LDR_ZXI: case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZZXI: case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::LDRBBui: case AArch64::LDRBui: case AArch64::LDRDui: @@ -2525,8 +2527,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STR_PXI: case AArch64::STR_ZXI: case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::STR_ZZZXI: case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::STRBBui: case AArch64::STRBui: case AArch64::STRDui: @@ -4318,7 +4322,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, break; // SVE case AArch64::STR_ZZZZXI: + case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZZZXI: + case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: Scale = TypeSize::getScalable(16); Width = TypeSize::getScalable(16 * 4); MinOffset = -256; @@ -4332,7 +4338,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MaxOffset = 253; break; case AArch64::STR_ZZXI: + case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: case AArch64::LDR_ZZXI: + case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: Scale = TypeSize::getScalable(16); Width = TypeSize::getScalable(16 * 2); MinOffset = -256; @@ -5559,8 +5567,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; - } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) || - AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register store without SVE store instructions"); + Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZZXI; @@ -5584,8 +5596,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; - } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) || - AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register store without SVE store instructions"); + Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZZZZXI; @@ -5736,8 +5752,12 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; - } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) || - AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register load without SVE load instructions"); + Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZZXI; @@ -5761,8 +5781,12 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; - } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) || - AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected register load without SVE load instructions"); + Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS; + StackID = TargetStackID::ScalableVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.isSVEorStreamingSVEAvailable() && "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZZZZXI; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index eddb96979f7b8..0c4b4f4c3ed88 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2625,16 +2625,22 @@ let Predicates = [HasSVE_or_SME] in { // These get expanded to individual LDR_ZXI/STR_ZXI instructions in // AArch64ExpandPseudoInsts. let mayLoad = 1, hasSideEffects = 0 in { - def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + + def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let mayStore = 1, hasSideEffects = 0 in { - def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + + def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; - def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } let AddedComplexity = 1 in { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 1a293389a1b16..a6ae7f8da2121 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -270,6 +270,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { break; case NeoverseV2: case NeoverseV3: + CacheLineSize = 64; EpilogueVectorizationMinVF = 8; MaxInterleaveFactor = 4; ScatterOverhead = 13; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 666c76b21e631..186191abe12a2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -689,10 +689,16 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) { if (!isShiftedMask_32(C1) || isInt<12>(C1)) return false; + // INSBI will clobber the input register in N0. Bail out if we need a copy to + // preserve this value. + SDValue N0 = Node->getOperand(0); + if (!N0.hasOneUse()) + return false; + // If C1 is a shifted mask (but can't be formed as an ORI), // use a bitfield insert of -1. // Transform (or x, C1) - // -> (qc.insbi x, width, shift) + // -> (qc.insbi x, -1, width, shift) const unsigned Leading = llvm::countl_zero((uint32_t)C1); const unsigned Trailing = llvm::countr_zero((uint32_t)C1); const unsigned Width = 32 - Leading - Trailing; @@ -705,7 +711,7 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) { SDLoc DL(Node); MVT VT = Node->getSimpleValueType(0); - SDValue Ops[] = {CurDAG->getSignedTargetConstant(-1, DL, VT), + SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT), CurDAG->getTargetConstant(Width, DL, VT), CurDAG->getTargetConstant(Trailing, DL, VT)}; SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops); @@ -2936,8 +2942,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, /// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9. bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset) { - if (SelectAddrFrameIndex(Addr, Base, Offset)) - return true; + // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only + // a 9-bit immediate can be folded. SDLoc DL(Addr); MVT VT = Addr.getSimpleValueType(); @@ -2947,8 +2953,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base, if (isUInt<9>(CVal)) { Base = Addr.getOperand(0); - if (auto *FIN = dyn_cast(Base)) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); + // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only + // a 9-bit immediate can be folded. Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 26bb1e8d17857..c7cb6e237aeac 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -845,10 +845,11 @@ let Predicates = [HasVendorXqcibi, IsRV32] in { let Predicates = [HasVendorXqcibm, IsRV32] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def QC_INSBRI : QCIRVInstRI<0b1, simm11, "qc.insbri">; - def QC_INSBI : RVInstIBase<0b001, OPC_CUSTOM_0, (outs GPRNoX0:$rd), - (ins simm5:$imm5, uimm5_plus1:$width, + def QC_INSBI : RVInstIBase<0b001, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, simm5:$imm5, uimm5_plus1:$width, uimm5:$shamt), "qc.insbi", "$rd, $imm5, $width, $shamt"> { + let Constraints = "$rd = $rd_wb"; bits<5> imm5; bits<5> shamt; bits<5> width; diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir index 83c9b73c57570..2b16dd0f29ecc 100644 --- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -1,5 +1,5 @@ -# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s -# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s --check-prefix=EXPAND --- | ; ModuleID = '' source_filename = "" @@ -14,13 +14,14 @@ define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable } - define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #2 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable } - define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #2 { entry: unreachable } attributes #0 = { nounwind "target-features"="+sve" } attributes #1 = { nounwind "target-features"="+sve2p1" } + attributes #2 = { nounwind "target-features"="+sve,+sme2" "aarch64_pstate_sm_enabled" } ... --- @@ -318,10 +319,10 @@ registers: - { id: 0, class: zpr2 } stack: liveins: - - { reg: '$z0_z1', virtual-reg: '%0' } + - { reg: '$z1_z2', virtual-reg: '%0' } body: | bb.0.entry: - liveins: $z0_z1 + liveins: $z1_z2 ; CHECK-LABEL: name: spills_fills_stack_id_zpr2 ; CHECK: stack: @@ -329,12 +330,12 @@ body: | ; CHECK-NEXT: stack-id: scalable-vector ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2 - ; EXPAND: STR_ZXI $z0, $sp, 0 - ; EXPAND: STR_ZXI $z1, $sp, 1 - ; EXPAND: $z0 = LDR_ZXI $sp, 0 - ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: STR_ZXI $z1, $sp, 0 + ; EXPAND: STR_ZXI $z2, $sp, 1 + ; EXPAND: $z1 = LDR_ZXI $sp, 0 + ; EXPAND: $z2 = LDR_ZXI $sp, 1 - %0:zpr2 = COPY $z0_z1 + %0:zpr2 = COPY $z1_z2 $z0_z1_z2_z3 = IMPLICIT_DEF $z4_z5_z6_z7 = IMPLICIT_DEF @@ -345,7 +346,7 @@ body: | $z24_z25_z26_z27 = IMPLICIT_DEF $z28_z29_z30_z31 = IMPLICIT_DEF - $z0_z1 = COPY %0 + $z1_z2 = COPY %0 RET_ReallyLR ... --- @@ -439,10 +440,10 @@ registers: - { id: 0, class: zpr4 } stack: liveins: - - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' } + - { reg: '$z1_z2_z3_z4', virtual-reg: '%0' } body: | bb.0.entry: - liveins: $z0_z1_z2_z3 + liveins: $z1_z2_z3_z4 ; CHECK-LABEL: name: spills_fills_stack_id_zpr4 ; CHECK: stack: @@ -450,16 +451,16 @@ body: | ; CHECK-NEXT: stack-id: scalable-vector ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4 - ; EXPAND: STR_ZXI $z0, $sp, 0 - ; EXPAND: STR_ZXI $z1, $sp, 1 - ; EXPAND: STR_ZXI $z2, $sp, 2 - ; EXPAND: STR_ZXI $z3, $sp, 3 - ; EXPAND: $z0 = LDR_ZXI $sp, 0 - ; EXPAND: $z1 = LDR_ZXI $sp, 1 - ; EXPAND: $z2 = LDR_ZXI $sp, 2 - ; EXPAND: $z3 = LDR_ZXI $sp, 3 + ; EXPAND: STR_ZXI $z1, $sp, 0 + ; EXPAND: STR_ZXI $z2, $sp, 1 + ; EXPAND: STR_ZXI $z3, $sp, 2 + ; EXPAND: STR_ZXI $z4, $sp, 3 + ; EXPAND: $z1 = LDR_ZXI $sp, 0 + ; EXPAND: $z2 = LDR_ZXI $sp, 1 + ; EXPAND: $z3 = LDR_ZXI $sp, 2 + ; EXPAND: $z4 = LDR_ZXI $sp, 3 - %0:zpr4 = COPY $z0_z1_z2_z3 + %0:zpr4 = COPY $z1_z2_z3_z4 $z0_z1_z2_z3 = IMPLICIT_DEF $z4_z5_z6_z7 = IMPLICIT_DEF @@ -470,7 +471,7 @@ body: | $z24_z25_z26_z27 = IMPLICIT_DEF $z28_z29_z30_z31 = IMPLICIT_DEF - $z0_z1_z2_z3 = COPY %0 + $z1_z2_z3_z4 = COPY %0 RET_ReallyLR ... --- diff --git a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir index ae70f91a4ec64..a1d615c910792 100644 --- a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir +++ b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir @@ -12,7 +12,7 @@ body: | bb.0: liveins: $p0, $z0 - ; CHECK: add_x + ; CHECK: name: add_x ; CHECK-NOT: MOVPRFX ; CHECK: $z0 = FADD_ZPmZ_S renamable $p0, killed $z0, renamable $z0 ; CHECK-NEXT: RET @@ -21,22 +21,36 @@ body: | ... -# CHECK: {{.*}} MSB_ZPmZZ_B {{.*}} --- name: expand_mls_to_msb body: | bb.0: + ; CHECK: name: expand_mls_to_msb + ; CHECK: {{.*}} MSB_ZPmZZ_B {{.*}} renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLS_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... -# CHECK: {{.*}} MAD_ZPmZZ_B {{.*}} --- name: expand_mla_to_mad body: | bb.0: + ; CHECK: name: expand_mla_to_mad + ; CHECK: {{.*}} MAD_ZPmZZ_B {{.*}} renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLA_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... + +--- +name: expand_transfer_implicit_defs +body: | + bb.0: + ; CHECK: name: expand_transfer_implicit_defs + ; CHECK: BUNDLE + ; CHECK-SAME: implicit-def $z0_z1_z2_z3 + liveins: $z1, $z2, $p0 + renamable $z0 = FADD_ZPZZ_D_UNDEF killed $p0, killed $z1, killed $z2, implicit-def $z0_z1_z2_z3 + RET_ReallyLR implicit $z0_z1_z2_z3 +... diff --git a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll index f227fa9aa423d..2fa06517508ce 100644 --- a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll +++ b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll @@ -105,6 +105,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; ; RV32ZBBXQCIBM-LABEL: test_cttz_i16: ; RV32ZBBXQCIBM: # %bb.0: +; RV32ZBBXQCIBM-NEXT: not a0, a0 ; RV32ZBBXQCIBM-NEXT: qc.insbi a0, -1, 1, 16 ; RV32ZBBXQCIBM-NEXT: ctz a0, a0 ; RV32ZBBXQCIBM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll index 6b7f9ae856625..88054a691bad1 100644 --- a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll +++ b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll @@ -47,6 +47,29 @@ define i32 @test_insbi_mask(i32 %a) nounwind { ret i32 %or } +define i32 @test_insbi_mask_mv(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: test_insbi_mask_mv: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IXQCIBM-LABEL: test_insbi_mask_mv: +; RV32IXQCIBM: # %bb.0: +; RV32IXQCIBM-NEXT: mv a0, a1 +; RV32IXQCIBM-NEXT: qc.insbi a0, -1, 16, 0 +; RV32IXQCIBM-NEXT: ret +; +; RV32IXQCIBMZBS-LABEL: test_insbi_mask_mv: +; RV32IXQCIBMZBS: # %bb.0: +; RV32IXQCIBMZBS-NEXT: mv a0, a1 +; RV32IXQCIBMZBS-NEXT: qc.insbi a0, -1, 16, 0 +; RV32IXQCIBMZBS-NEXT: ret + %or = or i32 %b, 65535 + ret i32 %or +} + define i32 @test_insbi_shifted_mask(i32 %a) nounwind { ; RV32I-LABEL: test_insbi_shifted_mask: ; RV32I: # %bb.0: @@ -67,6 +90,36 @@ define i32 @test_insbi_shifted_mask(i32 %a) nounwind { ret i32 %or } +define i32 @test_insbi_shifted_mask_multiple_uses(i32 %a) nounwind { +; RV32I-LABEL: test_insbi_shifted_mask_multiple_uses: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 15 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: addi a0, a0, 10 +; RV32I-NEXT: xor a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IXQCIBM-LABEL: test_insbi_shifted_mask_multiple_uses: +; RV32IXQCIBM: # %bb.0: +; RV32IXQCIBM-NEXT: lui a1, 15 +; RV32IXQCIBM-NEXT: or a1, a1, a0 +; RV32IXQCIBM-NEXT: addi a0, a0, 10 +; RV32IXQCIBM-NEXT: xor a0, a0, a1 +; RV32IXQCIBM-NEXT: ret +; +; RV32IXQCIBMZBS-LABEL: test_insbi_shifted_mask_multiple_uses: +; RV32IXQCIBMZBS: # %bb.0: +; RV32IXQCIBMZBS-NEXT: lui a1, 15 +; RV32IXQCIBMZBS-NEXT: or a1, a1, a0 +; RV32IXQCIBMZBS-NEXT: addi a0, a0, 10 +; RV32IXQCIBMZBS-NEXT: xor a0, a0, a1 +; RV32IXQCIBMZBS-NEXT: ret + %or = or i32 %a, 61440 + %add = add i32 %a, 10 + %xor = xor i32 %or, %add + ret i32 %xor +} + define i32 @test_single_bit_set(i32 %a) nounwind { ; RV32I-LABEL: test_single_bit_set: ; RV32I: # %bb.0: diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 2b1a9076afe4a..ece4106de4aca 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 21 -llvm_version_minor = 0 +llvm_version_minor = 1 llvm_version_patch = 0 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index b5aa8edc03dc7..520ff22dc6fb0 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = "Daniel Dunbar" __email__ = "daniel@minormatter.com" -__versioninfo__ = (21, 0, 0) +__versioninfo__ = (21, 1, 0) __version__ = ".".join(str(v) for v in __versioninfo__) + "dev" __all__ = [] diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py index d3369abae70b9..03eee0028b3cc 100644 --- a/llvm/utils/mlgo-utils/mlgo/__init__.py +++ b/llvm/utils/mlgo-utils/mlgo/__init__.py @@ -4,7 +4,7 @@ from datetime import timezone, datetime -__versioninfo__ = (20, 0, 0) +__versioninfo__ = (21, 1, 0) __version__ = ( ".".join(str(v) for v in __versioninfo__) + "dev" diff --git a/mlir/include/mlir/Analysis/SliceAnalysis.h b/mlir/include/mlir/Analysis/SliceAnalysis.h index d082d2d9f758b..18349d071bb2e 100644 --- a/mlir/include/mlir/Analysis/SliceAnalysis.h +++ b/mlir/include/mlir/Analysis/SliceAnalysis.h @@ -65,8 +65,9 @@ using ForwardSliceOptions = SliceOptions; /// /// The implementation traverses the use chains in postorder traversal for /// efficiency reasons: if an operation is already in `forwardSlice`, no -/// need to traverse its uses again. Since use-def chains form a DAG, this -/// terminates. +/// need to traverse its uses again. In the presence of use-def cycles in a +/// graph region, the traversal stops at the first operation that was already +/// visited (which is not added to the slice anymore). /// /// Upon return to the root call, `forwardSlice` is filled with a /// postorder list of uses (i.e. a reverse topological order). To get a proper @@ -114,8 +115,9 @@ void getForwardSlice(Value root, SetVector *forwardSlice, /// /// The implementation traverses the def chains in postorder traversal for /// efficiency reasons: if an operation is already in `backwardSlice`, no -/// need to traverse its definitions again. Since useuse-def chains form a DAG, -/// this terminates. +/// need to traverse its definitions again. In the presence of use-def cycles +/// in a graph region, the traversal stops at the first operation that was +/// already visited (which is not added to the slice anymore). /// /// Upon return to the root call, `backwardSlice` is filled with a /// postorder list of defs. This happens to be a topological order, from the diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp index 36a9812bd7972..991c71e3f689a 100644 --- a/mlir/lib/Analysis/SliceAnalysis.cpp +++ b/mlir/lib/Analysis/SliceAnalysis.cpp @@ -26,7 +26,8 @@ using namespace mlir; static void -getForwardSliceImpl(Operation *op, SetVector *forwardSlice, +getForwardSliceImpl(Operation *op, DenseSet &visited, + SetVector *forwardSlice, const SliceOptions::TransitiveFilter &filter = nullptr) { if (!op) return; @@ -40,20 +41,41 @@ getForwardSliceImpl(Operation *op, SetVector *forwardSlice, for (Region ®ion : op->getRegions()) for (Block &block : region) for (Operation &blockOp : block) - if (forwardSlice->count(&blockOp) == 0) - getForwardSliceImpl(&blockOp, forwardSlice, filter); - for (Value result : op->getResults()) { - for (Operation *userOp : result.getUsers()) - if (forwardSlice->count(userOp) == 0) - getForwardSliceImpl(userOp, forwardSlice, filter); - } + if (forwardSlice->count(&blockOp) == 0) { + // We don't have to check if the 'blockOp' is already visited because + // there cannot be a traversal path from this nested op to the parent + // and thus a cycle cannot be closed here. We still have to mark it + // as visited to stop before visiting this operation again if it is + // part of a cycle. + visited.insert(&blockOp); + getForwardSliceImpl(&blockOp, visited, forwardSlice, filter); + visited.erase(&blockOp); + } + + for (Value result : op->getResults()) + for (Operation *userOp : result.getUsers()) { + // A cycle can only occur within a basic block (not across regions or + // basic blocks) because the parent region must be a graph region, graph + // regions are restricted to always have 0 or 1 blocks, and there cannot + // be a def-use edge from a nested operation to an operation in an + // ancestor region. Therefore, we don't have to but may use the same + // 'visited' set across regions/blocks as long as we remove operations + // from the set again when the DFS traverses back from the leaf to the + // root. + if (forwardSlice->count(userOp) == 0 && visited.insert(userOp).second) + getForwardSliceImpl(userOp, visited, forwardSlice, filter); + + visited.erase(userOp); + } forwardSlice->insert(op); } void mlir::getForwardSlice(Operation *op, SetVector *forwardSlice, const ForwardSliceOptions &options) { - getForwardSliceImpl(op, forwardSlice, options.filter); + DenseSet visited; + visited.insert(op); + getForwardSliceImpl(op, visited, forwardSlice, options.filter); if (!options.inclusive) { // Don't insert the top level operation, we just queried on it and don't // want it in the results. @@ -69,8 +91,12 @@ void mlir::getForwardSlice(Operation *op, SetVector *forwardSlice, void mlir::getForwardSlice(Value root, SetVector *forwardSlice, const SliceOptions &options) { - for (Operation *user : root.getUsers()) - getForwardSliceImpl(user, forwardSlice, options.filter); + DenseSet visited; + for (Operation *user : root.getUsers()) { + visited.insert(user); + getForwardSliceImpl(user, visited, forwardSlice, options.filter); + visited.erase(user); + } // Reverse to get back the actual topological order. // std::reverse does not work out of the box on SetVector and I want an @@ -80,6 +106,7 @@ void mlir::getForwardSlice(Value root, SetVector *forwardSlice, } static LogicalResult getBackwardSliceImpl(Operation *op, + DenseSet &visited, SetVector *backwardSlice, const BackwardSliceOptions &options) { if (!op || op->hasTrait()) @@ -93,8 +120,12 @@ static LogicalResult getBackwardSliceImpl(Operation *op, auto processValue = [&](Value value) { if (auto *definingOp = value.getDefiningOp()) { - if (backwardSlice->count(definingOp) == 0) - return getBackwardSliceImpl(definingOp, backwardSlice, options); + if (backwardSlice->count(definingOp) == 0 && + visited.insert(definingOp).second) + return getBackwardSliceImpl(definingOp, visited, backwardSlice, + options); + + visited.erase(definingOp); } else if (auto blockArg = dyn_cast(value)) { if (options.omitBlockArguments) return success(); @@ -107,7 +138,8 @@ static LogicalResult getBackwardSliceImpl(Operation *op, if (parentOp && backwardSlice->count(parentOp) == 0) { if (parentOp->getNumRegions() == 1 && llvm::hasSingleElement(parentOp->getRegion(0).getBlocks())) { - return getBackwardSliceImpl(parentOp, backwardSlice, options); + return getBackwardSliceImpl(parentOp, visited, backwardSlice, + options); } } } else { @@ -145,7 +177,10 @@ static LogicalResult getBackwardSliceImpl(Operation *op, LogicalResult mlir::getBackwardSlice(Operation *op, SetVector *backwardSlice, const BackwardSliceOptions &options) { - LogicalResult result = getBackwardSliceImpl(op, backwardSlice, options); + DenseSet visited; + visited.insert(op); + LogicalResult result = + getBackwardSliceImpl(op, visited, backwardSlice, options); if (!options.inclusive) { // Don't insert the top level operation, we just queried on it and don't diff --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir index 0848a924b9d96..c53667a98cfbe 100644 --- a/mlir/test/Dialect/Affine/slicing-utils.mlir +++ b/mlir/test/Dialect/Affine/slicing-utils.mlir @@ -292,3 +292,26 @@ func.func @slicing_test_multiple_return(%arg0: index) -> (index, index) { %0:2 = "slicing-test-op"(%arg0, %arg0): (index, index) -> (index, index) return %0#0, %0#1 : index, index } + +// ----- + +// FWD-LABEL: graph_region_with_cycle +// BWD-LABEL: graph_region_with_cycle +// FWDBWD-LABEL: graph_region_with_cycle +func.func @graph_region_with_cycle() { + test.isolated_graph_region { + // FWD: matched: [[V0:%.+]] = "slicing-test-op"([[V1:%.+]]) : (i1) -> i1 forward static slice: + // FWD: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 + // FWD: matched: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 forward static slice: + // FWD: [[V0]] = "slicing-test-op"([[V1]]) : (i1) -> i1 + + // BWD: matched: [[V0:%.+]] = "slicing-test-op"([[V1:%.+]]) : (i1) -> i1 backward static slice: + // BWD: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 + // BWD: matched: [[V1]] = "slicing-test-op"([[V0]]) : (i1) -> i1 backward static slice: + // BWD: [[V0]] = "slicing-test-op"([[V1]]) : (i1) -> i1 + %0 = "slicing-test-op"(%1) : (i1) -> i1 + %1 = "slicing-test-op"(%0) : (i1) -> i1 + } + + return +}