-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[DirectX] Support ConstExpr GEPs #150082
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DirectX] Support ConstExpr GEPs #150082
Conversation
farzonl
commented
Jul 22, 2025
- Fixes [DirectX] DXILDataScalarization.cpp is leaving LLVM IR in a broken state where globals are deleted when they still have const uses. #150050
- Address the issue of many nested geps
- Check for ConstantExpr GEP if we see it check if it needs a global replacement with a new type. Create the new constExpr Gep and set the pointer operand to it. Finally cleanup and remove the old nested geps.
- Fixes llvm#150050 - Address the issue of many nested geps - Check for ConstantExpr GEP if we see it check if it needs a global replacement with a new type. Create the new constExpr Gep and set the pointer operand to it. Finally cleanup and remove the old nested geps.
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changes
Full diff: https://github.com/llvm/llvm-project/pull/150082.diff 2 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index d9d9b36d0b739..5ec55a6718b9a 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -300,11 +300,84 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
return replaceDynamicExtractElementInst(EEI);
}
+static void buildConstExprGEPChain(GetElementPtrInst &GEPI, Value *CurrentPtr,
+ SmallVector<ConstantExpr *, 4> &GEPChain,
+ IRBuilder<> &Builder) {
+ // Process the rest of the chain in reverse order (skipping the innermost)
+ for (int I = GEPChain.size() - 2; I >= 0; I--) {
+ ConstantExpr *CE = GEPChain[I];
+ GetElementPtrInst *GEPInst =
+ cast<GetElementPtrInst>(CE->getAsInstruction());
+ GEPInst->insertBefore(GEPI.getIterator());
+ SmallVector<Value *, MaxVecSize> CurrIndices(GEPInst->indices());
+
+ // Create a new GEP instruction
+ Type *SourceTy = GEPInst->getSourceElementType();
+ CurrentPtr =
+ Builder.CreateGEP(SourceTy, CurrentPtr, CurrIndices, GEPInst->getName(),
+ GEPInst->getNoWrapFlags());
+
+ // If this is the outermost GEP, update the main GEPI
+ if (I == 0) {
+ GEPI.setOperand(GEPI.getPointerOperandIndex(), CurrentPtr);
+ }
+
+ // Clean up the temporary instruction
+ GEPInst->eraseFromParent();
+ }
+}
+
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
Value *PtrOperand = GEPI.getPointerOperand();
Type *OrigGEPType = GEPI.getSourceElementType();
Type *NewGEPType = OrigGEPType;
bool NeedsTransform = false;
+ // Check if the pointer operand is a ConstantExpr GEP
+ if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand);
+ PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) {
+
+ // Collect all nested GEPs in the chain
+ SmallVector<ConstantExpr *, 4> GEPChain;
+ Value *BasePointer = PtrOpGEPCE->getOperand(0);
+ GEPChain.push_back(PtrOpGEPCE);
+
+ // Walk up the chain to find all nested GEPs and the base pointer
+ while (auto *NextGEP = dyn_cast<ConstantExpr>(BasePointer)) {
+ if (NextGEP->getOpcode() != Instruction::GetElementPtr)
+ break;
+
+ GEPChain.push_back(NextGEP);
+ BasePointer = NextGEP->getOperand(0);
+ }
+
+ // Check if the base pointer is a global that needs replacement
+ if (GlobalVariable *NewGlobal = lookupReplacementGlobal(BasePointer)) {
+ IRBuilder<> Builder(&GEPI);
+
+ // Create a new GEP for the innermost GEP (last in the chain)
+ ConstantExpr *InnerGEPCE = GEPChain.back();
+ GetElementPtrInst *InnerGEP =
+ cast<GetElementPtrInst>(InnerGEPCE->getAsInstruction());
+ InnerGEP->insertBefore(GEPI.getIterator());
+
+ SmallVector<Value *, MaxVecSize> Indices(InnerGEP->indices());
+ Type *NewGEPType = NewGlobal->getValueType();
+ Value *NewInnerGEP =
+ Builder.CreateGEP(NewGEPType, NewGlobal, Indices, InnerGEP->getName(),
+ InnerGEP->getNoWrapFlags());
+
+ // If there's only one GEP in the chain, update the main GEPI directly
+ if (GEPChain.size() == 1)
+ GEPI.setOperand(GEPI.getPointerOperandIndex(), NewInnerGEP);
+ else
+ // For multiple GEPs, we need to create a chain of GEPs
+ buildConstExprGEPChain(GEPI, NewInnerGEP, GEPChain, Builder);
+
+ // Clean up the innermost GEP
+ InnerGEP->eraseFromParent();
+ return true;
+ }
+ }
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
NewGEPType = NewGlobal->getValueType();
diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
new file mode 100644
index 0000000000000..156a8e7c5c386
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK
+; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK
+
+@aTile = hidden addrspace(3) global [10 x [10 x <4 x i32>]] zeroinitializer, align 16
+@bTile = hidden addrspace(3) global [10 x [10 x i32]] zeroinitializer, align 16
+@cTile = internal global [2 x [2 x <2 x i32>]] zeroinitializer, align 16
+@dTile = internal global [2 x [2 x [2 x <2 x i32>]]] zeroinitializer, align 16
+
+define void @CSMain() {
+; CHECK-LABEL: define void @CSMain() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16
+;
+; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2
+; SCHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16
+; SCHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+;
+; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1
+; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2
+; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I36:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 3
+; FCHECK-NEXT: [[DOTI07:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 48), align 16
+; FCHECK-NEXT: [[DOTI119:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 49), align 4
+; FCHECK-NEXT: [[DOTI2211:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 50), align 8
+; FCHECK-NEXT: [[DOTI3313:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 51), align 4
+; FCHECK-NEXT: store i32 [[DOTI07]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+; FCHECK-NEXT: store i32 [[DOTI119]], ptr [[AFRAGPACKED_I_SCALARIZE_I14]], align 4
+; FCHECK-NEXT: store i32 [[DOTI2211]], ptr [[AFRAGPACKED_I_SCALARIZE_I25]], align 8
+; FCHECK-NEXT: store i32 [[DOTI3313]], ptr [[AFRAGPACKED_I_SCALARIZE_I36]], align 4
+;
+; CHECK-NEXT: ret void
+entry:
+ %aFragPacked.i = alloca <4 x i32>, align 16
+ %0 = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x <4 x i32>]], ptr addrspace(3) @aTile, i32 0, i32 1), i32 0, i32 2), align 16
+ store <4 x i32> %0, ptr %aFragPacked.i, align 16
+ ret void
+}
+
+define void @Main() {
+; CHECK-LABEL: define void @Main() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[BFRAGPACKED_I:%.*]] = alloca i32, align 16
+;
+; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16
+; SCHECK-NEXT: store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16
+;
+; FCHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
+; FCHECK-NEXT: store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16
+;
+; CHECK-NEXT: ret void
+entry:
+ %bFragPacked.i = alloca i32, align 16
+ %0 = load i32, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1), align 16
+ store i32 %0, ptr %bFragPacked.i, align 16
+ ret void
+}
+
+define void @global_nested_geps_3d() {
+; CHECK-LABEL: define void @global_nested_geps_3d() {
+; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+;
+; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
+;
+; CHECK-NEXT: ret void
+ %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
+ ret void
+}
+
+define void @global_nested_geps_4d() {
+; CHECK-LABEL: define void @global_nested_geps_4d() {
+; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+;
+; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
+;
+; CHECK-NEXT: ret void
+ %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
+ ret void
+}
|
Build failure in:
Is unrelated to this change. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious question, is it possible for the innermost GEP to be a constexpr, but the outer GEPs not to be?
Is that a worthy test case?
Not sure what you mean exactly, but a ConstantExpr GEP's pointer operand can only be a Global Variable or another ConstantExpr GEP. A (virtual) register is not allowed in a ConstantExpr. |
Specifically, I mean for this code block:
Is it possible for a situation to happen so that the dyn_cast fails, but the opcode is indeed a GetElementPtr. |
It is possible for that dyn_cast to fail, but if it does fail then that GEP must be an instruction |
Perhaps you will prefer this implementation? I find it simpler and it avoids creating a bunch of temporary instructions. bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
GEPOperator *GOp = cast<GEPOperator>(&GEPI);
Value *PtrOperand = GOp->getPointerOperand();
Type *NewGEPType = GOp->getSourceElementType();
bool NeedsTransform = false;
// Walk the chain of GEP ConstantExprs to obtain the root GEP
while (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand)) {
if (auto *PtrOpGOp = dyn_cast<GEPOperator>(PtrOpGEPCE)) {
GOp = PtrOpGOp;
PtrOperand = GOp->getPointerOperand();
NewGEPType = GOp->getSourceElementType();
} else
break;
}
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
PtrOperand = NewGlobal;
NewGEPType = NewGlobal->getValueType();
NeedsTransform = true;
} else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
Type *AllocatedType = Alloca->getAllocatedType();
// Only transform if the allocated type is an array
if (AllocatedType != GOp->getResultElementType() && isa<ArrayType>(AllocatedType)) {
NewGEPType = AllocatedType;
NeedsTransform = true;
}
}
// Scalar geps should remain scalars geps. The dxil-flatten-arrays pass will
// convert these scalar geps into flattened array geps
if (!isa<ArrayType>(GOp->getSourceElementType()))
NewGEPType = GOp->getSourceElementType();
// Note: We bail if this isn't a gep touched via alloca or global
// transformations
if (!NeedsTransform)
return false;
IRBuilder<> Builder(&GEPI);
SmallVector<Value *, 4> Indices(GOp->indices());
Value *NewGEP =
Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
GOp->getName(), GOp->getNoWrapFlags());
GOp->replaceAllUsesWith(NewGEP);
if (ConstantExpr *GOpCE = dyn_cast<ConstantExpr>(GOp))
GOpCE->destroyConstant();
else if (GetElementPtrInst *GOpGEPI = dyn_cast<GetElementPtrInst>(GOp))
GOpGEPI->eraseFromParent(); // In this case, GOp must be GEPI
return true;
} It passes all the directx codegen tests, including the new test you added in this PR. |
e6acd09
to
2c0e73c
Compare