Skip to content

[DirectX] Support ConstExpr GEPs #150082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 23, 2025
Merged

Conversation

farzonl
Copy link
Member

@farzonl farzonl commented Jul 22, 2025

- Fixes llvm#150050
- Address the issue of many nested geps
- Check for ConstantExpr GEP if we see it check if it needs a global
  replacement with a new type. Create the new constExpr Gep and set
  the pointer operand to it.
  Finally cleanup and remove the old nested geps.
@llvmbot
Copy link
Member

llvmbot commented Jul 22, 2025

@llvm/pr-subscribers-backend-directx

Author: Farzon Lotfi (farzonl)

Changes
  • Fixes #150050
  • Address the issue of many nested geps
  • Check for ConstantExpr GEP if we see it check if it needs a global replacement with a new type. Create the new constExpr Gep and set the pointer operand to it. Finally cleanup and remove the old nested geps.

Full diff: https://github.com/llvm/llvm-project/pull/150082.diff

2 Files Affected:

  • (modified) llvm/lib/Target/DirectX/DXILDataScalarization.cpp (+73)
  • (added) llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll (+80)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index d9d9b36d0b739..5ec55a6718b9a 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -300,11 +300,84 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
   return replaceDynamicExtractElementInst(EEI);
 }
 
+static void buildConstExprGEPChain(GetElementPtrInst &GEPI, Value *CurrentPtr,
+                                   SmallVector<ConstantExpr *, 4> &GEPChain,
+                                   IRBuilder<> &Builder) {
+  // Process the rest of the chain in reverse order (skipping the innermost)
+  for (int I = GEPChain.size() - 2; I >= 0; I--) {
+    ConstantExpr *CE = GEPChain[I];
+    GetElementPtrInst *GEPInst =
+        cast<GetElementPtrInst>(CE->getAsInstruction());
+    GEPInst->insertBefore(GEPI.getIterator());
+    SmallVector<Value *, MaxVecSize> CurrIndices(GEPInst->indices());
+
+    // Create a new GEP instruction
+    Type *SourceTy = GEPInst->getSourceElementType();
+    CurrentPtr =
+        Builder.CreateGEP(SourceTy, CurrentPtr, CurrIndices, GEPInst->getName(),
+                          GEPInst->getNoWrapFlags());
+
+    // If this is the outermost GEP, update the main GEPI
+    if (I == 0) {
+      GEPI.setOperand(GEPI.getPointerOperandIndex(), CurrentPtr);
+    }
+
+    // Clean up the temporary instruction
+    GEPInst->eraseFromParent();
+  }
+}
+
 bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   Value *PtrOperand = GEPI.getPointerOperand();
   Type *OrigGEPType = GEPI.getSourceElementType();
   Type *NewGEPType = OrigGEPType;
   bool NeedsTransform = false;
+  // Check if the pointer operand is a ConstantExpr GEP
+  if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand);
+      PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) {
+
+    // Collect all nested GEPs in the chain
+    SmallVector<ConstantExpr *, 4> GEPChain;
+    Value *BasePointer = PtrOpGEPCE->getOperand(0);
+    GEPChain.push_back(PtrOpGEPCE);
+
+    // Walk up the chain to find all nested GEPs and the base pointer
+    while (auto *NextGEP = dyn_cast<ConstantExpr>(BasePointer)) {
+      if (NextGEP->getOpcode() != Instruction::GetElementPtr)
+        break;
+
+      GEPChain.push_back(NextGEP);
+      BasePointer = NextGEP->getOperand(0);
+    }
+
+    // Check if the base pointer is a global that needs replacement
+    if (GlobalVariable *NewGlobal = lookupReplacementGlobal(BasePointer)) {
+      IRBuilder<> Builder(&GEPI);
+
+      // Create a new GEP for the innermost GEP (last in the chain)
+      ConstantExpr *InnerGEPCE = GEPChain.back();
+      GetElementPtrInst *InnerGEP =
+          cast<GetElementPtrInst>(InnerGEPCE->getAsInstruction());
+      InnerGEP->insertBefore(GEPI.getIterator());
+
+      SmallVector<Value *, MaxVecSize> Indices(InnerGEP->indices());
+      Type *NewGEPType = NewGlobal->getValueType();
+      Value *NewInnerGEP =
+          Builder.CreateGEP(NewGEPType, NewGlobal, Indices, InnerGEP->getName(),
+                            InnerGEP->getNoWrapFlags());
+
+      // If there's only one GEP in the chain, update the main GEPI directly
+      if (GEPChain.size() == 1)
+        GEPI.setOperand(GEPI.getPointerOperandIndex(), NewInnerGEP);
+      else
+        // For multiple GEPs, we need to create a chain of GEPs
+        buildConstExprGEPChain(GEPI, NewInnerGEP, GEPChain, Builder);
+
+      // Clean up the innermost GEP
+      InnerGEP->eraseFromParent();
+      return true;
+    }
+  }
 
   if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
     NewGEPType = NewGlobal->getValueType();
diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
new file mode 100644
index 0000000000000..156a8e7c5c386
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK
+; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK
+
+@aTile = hidden addrspace(3) global [10 x [10 x <4 x i32>]] zeroinitializer, align 16
+@bTile = hidden addrspace(3) global [10 x [10 x i32]] zeroinitializer, align 16
+@cTile = internal global [2 x [2 x <2 x i32>]] zeroinitializer, align 16
+@dTile = internal global [2 x [2 x [2 x <2 x i32>]]] zeroinitializer, align 16
+
+define void @CSMain() {
+; CHECK-LABEL: define void @CSMain() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16
+;
+; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2
+; SCHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16
+; SCHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+;
+; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1
+; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2
+; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I36:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 3
+; FCHECK-NEXT:    [[DOTI07:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 48), align 16
+; FCHECK-NEXT:    [[DOTI119:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 49), align 4
+; FCHECK-NEXT:    [[DOTI2211:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 50), align 8
+; FCHECK-NEXT:    [[DOTI3313:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 51), align 4
+; FCHECK-NEXT:    store i32 [[DOTI07]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+; FCHECK-NEXT:    store i32 [[DOTI119]], ptr [[AFRAGPACKED_I_SCALARIZE_I14]], align 4
+; FCHECK-NEXT:    store i32 [[DOTI2211]], ptr [[AFRAGPACKED_I_SCALARIZE_I25]], align 8
+; FCHECK-NEXT:    store i32 [[DOTI3313]], ptr [[AFRAGPACKED_I_SCALARIZE_I36]], align 4
+;
+; CHECK-NEXT:    ret void
+entry:
+  %aFragPacked.i = alloca <4 x i32>, align 16
+  %0 = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x <4 x i32>]], ptr addrspace(3) @aTile, i32 0, i32 1), i32 0, i32 2), align 16
+  store <4 x i32> %0, ptr %aFragPacked.i, align 16
+  ret void
+}
+
+define void @Main() {
+; CHECK-LABEL: define void @Main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[BFRAGPACKED_I:%.*]] = alloca i32, align 16
+;
+; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16
+; SCHECK-NEXT:    store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16
+;
+; FCHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
+; FCHECK-NEXT:    store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16
+;
+; CHECK-NEXT:    ret void
+entry:
+  %bFragPacked.i = alloca i32, align 16
+  %0 = load i32, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1), align 16
+  store i32 %0, ptr %bFragPacked.i, align 16
+  ret void
+}
+
+define void @global_nested_geps_3d() {
+; CHECK-LABEL: define void @global_nested_geps_3d() {
+; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+;
+; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
+;
+; CHECK-NEXT:    ret void
+  %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
+  ret void
+}
+
+define void @global_nested_geps_4d() {
+; CHECK-LABEL: define void @global_nested_geps_4d() {
+; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+;
+; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
+;
+; CHECK-NEXT:    ret void
+  %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
+  ret void
+}

@farzonl
Copy link
Member Author

farzonl commented Jul 22, 2025

Build failure in:

  Clang :: CodeGenCXX/microsoft-abi-eh-disabled.cpp
  Clang :: CodeGenCXX/microsoft-abi-eh-ip2state.cpp

Is unrelated to this change.

Copy link
Contributor

@bob80905 bob80905 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious question, is it possible for the innermost GEP to be a constexpr, but the outer GEPs not to be?
Is that a worthy test case?

@Icohedron
Copy link
Contributor

Curious question, is it possible for the innermost GEP to be a constexpr, but the outer GEPs not to be? Is that a worthy test case?

Not sure what you mean exactly, but a ConstantExpr GEP's pointer operand can only be a Global Variable or another ConstantExpr GEP. A (virtual) register is not allowed in a ConstantExpr.

@bob80905
Copy link
Contributor

Curious question, is it possible for the innermost GEP to be a constexpr, but the outer GEPs not to be? Is that a worthy test case?

Not sure what you mean exactly, but a ConstantExpr GEP's pointer operand can only be a Global Variable or another ConstantExpr GEP. A (virtual) register is not allowed in a ConstantExpr.

Specifically, I mean for this code block:

 while (auto *NextGEP = dyn_cast<ConstantExpr>(BasePointer)) {
      if (NextGEP->getOpcode() != Instruction::GetElementPtr)

Is it possible for a situation to happen so that the dyn_cast fails, but the opcode is indeed a GetElementPtr.
If that's possible then the GEP chain isn't fully complete. And when you say a ConstantExpr GEP, that makes me think there might exist non-ConstantExpr GEPs.

@Icohedron
Copy link
Contributor

Icohedron commented Jul 22, 2025

Curious question, is it possible for the innermost GEP to be a constexpr, but the outer GEPs not to be? Is that a worthy test case?

Not sure what you mean exactly, but a ConstantExpr GEP's pointer operand can only be a Global Variable or another ConstantExpr GEP. A (virtual) register is not allowed in a ConstantExpr.

Specifically, I mean for this code block:

 while (auto *NextGEP = dyn_cast<ConstantExpr>(BasePointer)) {
      if (NextGEP->getOpcode() != Instruction::GetElementPtr)

Is it possible for a situation to happen so that the dyn_cast fails, but the opcode is indeed a GetElementPtr. If that's possible then the GEP chain isn't fully complete. And when you say a ConstantExpr GEP, that makes me think there might exist non-ConstantExpr GEPs.

It is possible for that dyn_cast to fail, but if it does fail then that GEP must be an instruction
Then, because of make_early_inc_range visiting of the instructions, that instruction would have been visited before the current GEP instruction.

@Icohedron
Copy link
Contributor

Icohedron commented Jul 22, 2025

Perhaps you will prefer this implementation? I find it simpler and it avoids creating a bunch of temporary instructions.

bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
  GEPOperator *GOp = cast<GEPOperator>(&GEPI);
  Value *PtrOperand = GOp->getPointerOperand();
  Type *NewGEPType = GOp->getSourceElementType();
  bool NeedsTransform = false;

  // Walk the chain of GEP ConstantExprs to obtain the root GEP
  while (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand)) {
    if (auto *PtrOpGOp = dyn_cast<GEPOperator>(PtrOpGEPCE)) {
      GOp = PtrOpGOp;
      PtrOperand = GOp->getPointerOperand();
      NewGEPType = GOp->getSourceElementType();
    } else
      break;
  }

  if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
    PtrOperand = NewGlobal;
    NewGEPType = NewGlobal->getValueType();
    NeedsTransform = true;
  } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
    Type *AllocatedType = Alloca->getAllocatedType();
    // Only transform if the allocated type is an array
    if (AllocatedType != GOp->getResultElementType() && isa<ArrayType>(AllocatedType)) {
      NewGEPType = AllocatedType;
      NeedsTransform = true;
    }
  }

  // Scalar geps should remain scalars geps. The dxil-flatten-arrays pass will
  // convert these scalar geps into flattened array geps
  if (!isa<ArrayType>(GOp->getSourceElementType()))
    NewGEPType = GOp->getSourceElementType();

  // Note: We bail if this isn't a gep touched via alloca or global
  // transformations
  if (!NeedsTransform)
    return false;

  IRBuilder<> Builder(&GEPI);
  SmallVector<Value *, 4> Indices(GOp->indices());
  Value *NewGEP =
      Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
                        GOp->getName(), GOp->getNoWrapFlags());

  GOp->replaceAllUsesWith(NewGEP);
  if (ConstantExpr *GOpCE = dyn_cast<ConstantExpr>(GOp))
    GOpCE->destroyConstant();
  else if (GetElementPtrInst *GOpGEPI = dyn_cast<GetElementPtrInst>(GOp))
    GOpGEPI->eraseFromParent(); // In this case, GOp must be GEPI

  return true;
}

It passes all the directx codegen tests, including the new test you added in this PR.

@farzonl farzonl force-pushed the bugfix/issue-150050 branch from e6acd09 to 2c0e73c Compare July 23, 2025 15:52
@farzonl farzonl merged commit c73c19c into llvm:main Jul 23, 2025
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[DirectX] DXILDataScalarization.cpp is leaving LLVM IR in a broken state where globals are deleted when they still have const uses.
4 participants