EnzymeAD · Copilot · Dec 8, 2025 · Dec 8, 2025 · Dec 8, 2025 · Dec 8, 2025
diff --git a/src/enzyme_ad/jax/Utils.cpp b/src/enzyme_ad/jax/Utils.cpp
@@ -1241,6 +1241,140 @@ bool mayReadFrom(Operation *op, Value val) {
   return true;
 }
 
+mlir::func::FuncOp adaptToCallingConvention(mlir::func::FuncOp f,
+                                            ArrayRef<mlir::Type> inputTensorTypes,
+                                            ArrayRef<int64_t> byteOffsets) {
+  // Get the original function type
+  auto originalFuncType = f.getFunctionType();
+  size_t numInputs = originalFuncType.getNumInputs();
+
+  // Validate inputs
+  assert(inputTensorTypes.size() == numInputs &&
+         "Number of input tensor types must match function inputs");
+  assert(byteOffsets.size() == numInputs &&
+         "Number of byte offsets must match function inputs");
+
+  // Create the new function type using the outer specification types
+  auto context = f.getContext();
+  auto loc = f.getLoc();
+  auto newFuncType = mlir::FunctionType::get(
+      context, inputTensorTypes, originalFuncType.getResults());
+
+  // Create a new function with a unique name
+  std::string wrapperName = (f.getName() + "_adapted").str();
+  OpBuilder builder(context);
+  builder.setInsertionPoint(f);
+
+  auto wrapperFunc = builder.create<mlir::func::FuncOp>(loc, wrapperName, newFuncType);
+
+  // Add entry block to the wrapper function
+  auto &entryBlock = *wrapperFunc.addEntryBlock();
+  builder.setInsertionPointToStart(&entryBlock);
+
+  // Process each argument
+  SmallVector<Value> adaptedArgs;
+  for (size_t i = 0; i < numInputs; ++i) {
+    Value arg = entryBlock.getArgument(i);
+    auto outerType = dyn_cast<RankedTensorType>(inputTensorTypes[i]);
+    auto innerType = dyn_cast<RankedTensorType>(originalFuncType.getInput(i));
+
+    if (!outerType || !innerType) {
+      // If not tensor types, pass through as-is
+      adaptedArgs.push_back(arg);
+      continue;
+    }
+
+    Value adaptedArg = arg;
+
+    // Handle byte offset if non-zero
+    int64_t byteOffset = byteOffsets[i];
+    if (byteOffset != 0) {
+      // Calculate element offset from byte offset
+      auto elementType = outerType.getElementType();
+
+      // Get element size in bytes
+      int64_t elementBytes = 0;
+      if (auto complexType = dyn_cast<ComplexType>(elementType)) {
+        // Complex types have two components of the underlying element type
+        auto componentType = complexType.getElementType();
+        unsigned componentBitWidth = componentType.getIntOrFloatBitWidth();
+        elementBytes = 2 * ((componentBitWidth + 7) / 8);
+      } else {
+        unsigned elementBitWidth = elementType.getIntOrFloatBitWidth();
+        assert(elementBitWidth > 0 &&
+               "Element type must have valid bit width for byte offset calculation");
+        elementBytes = (elementBitWidth + 7) / 8;
+      }
+
+      // Verify byte offset aligns with element boundaries
+      assert(byteOffset % elementBytes == 0 &&
+             "Byte offset must be aligned to element boundaries");
+
+      int64_t elementOffset = byteOffset / elementBytes;
+
+      auto outerShape = outerType.getShape();
+      auto innerShape = innerType.getShape();
+
+      // Convert linear element offset to multi-dimensional start indices
+      SmallVector<int64_t> startIndices;
+      SmallVector<int64_t> limitIndices;
+      SmallVector<int64_t> strides(outerShape.size(), 1);
+
+      int64_t remainingOffset = elementOffset;
+
+      // Calculate strides for each dimension (row-major order)
+      for (size_t j = 0; j < outerShape.size(); ++j) {
+        // Calculate the stride for this dimension
+        int64_t dimStride = 1;
+        for (size_t k = j + 1; k < outerShape.size(); ++k) {
+          dimStride *= outerShape[k];
+        }
+
+        // Calculate the index for this dimension
+        int64_t dimIndex = remainingOffset / dimStride;
+        startIndices.push_back(dimIndex);
+
+        // Calculate the limit based on the inner shape
+        int64_t innerDim = (j < innerShape.size()) ? innerShape[j] : 1;
+        int64_t limitIndex = dimIndex + innerDim;
+
+        // Ensure limit doesn't exceed outer dimension bounds
+        assert(limitIndex <= outerShape[j] &&
+               "Byte offset results in out-of-bounds access");
+        limitIndices.push_back(limitIndex);
+
+        // Update remaining offset for next dimension
+        remainingOffset = remainingOffset % dimStride;
+      }
+
+      auto slicedType = RankedTensorType::get(innerShape, outerType.getElementType());
+      adaptedArg = builder.create<stablehlo::SliceOp>(
+          loc, slicedType, adaptedArg,
+          builder.getDenseI64ArrayAttr(startIndices),
+          builder.getDenseI64ArrayAttr(limitIndices),
+          builder.getDenseI64ArrayAttr(strides));
+    }
+
+    // Handle element type conversion if needed
+    if (outerType.getElementType() != innerType.getElementType()) {
+      auto convertedType = RankedTensorType::get(
+          cast<RankedTensorType>(adaptedArg.getType()).getShape(),
+          innerType.getElementType());
+      adaptedArg = builder.create<stablehlo::ConvertOp>(loc, convertedType, adaptedArg);
+    }
+
+    adaptedArgs.push_back(adaptedArg);
+  }
+
+  // Call the original function with adapted arguments
+  auto callOp = builder.create<mlir::func::CallOp>(loc, f, adaptedArgs);
+
+  // Return the results
+  builder.create<mlir::func::ReturnOp>(loc, callOp.getResults());
+
+  return wrapperFunc;
+}
+
 } // namespace enzyme
 
 namespace stablehlo {

diff --git a/src/enzyme_ad/jax/Utils.h b/src/enzyme_ad/jax/Utils.h
@@ -840,6 +840,17 @@ bool areValidInsertionDims(RankedTensorType inputType,
 
 bool isOnlyUsedInOperation(Operation *operation, Operation *parentOp);
 
+/// Adapt a function to a calling convention with different element types and
+/// byte offsets. Creates a new wrapper function that performs necessary
+/// conversions and slicing before calling the original function.
+/// \param f The original MLIR function to wrap
+/// \param inputTensorTypes The tensor types for the wrapper function arguments
+/// \param byteOffsets Byte offsets for each argument (0 means no offset)
+/// \return A new function that adapts the calling convention
+mlir::func::FuncOp adaptToCallingConvention(mlir::func::FuncOp f,
+                                            ArrayRef<mlir::Type> inputTensorTypes,
+                                            ArrayRef<int64_t> byteOffsets);
+
 } // namespace enzyme
 
 namespace stablehlo {