Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate LLVM to llvm/llvm-project@3e61c1ab7f #20015

Merged
merged 2 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions compiler/plugins/input/TOSA/InputConversion/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,16 @@ void buildTOSAInputConversionPassPipeline(OpPassManager &passManager) {
// Currently we don't handle SCF ops well and have to convert them all to CFG.
// In the future it would be nice if we could have all of flow be both scf
// and cfg compatible.
passManager.addNestedPass<func::FuncOp>(tosa::createTosaToSCF());
passManager.addNestedPass<func::FuncOp>(createTosaToSCFPass());

// We also don't handle calls well on the old codepath; until we remove the
// use of the CFG we can continue inlining.
passManager.addPass(mlir::createInlinerPass());

passManager.addNestedPass<func::FuncOp>(
tosa::createTosaMakeBroadcastablePass());
passManager.addNestedPass<func::FuncOp>(tosa::createTosaToArith());
passManager.addNestedPass<func::FuncOp>(tosa::createTosaToTensor());
passManager.addNestedPass<func::FuncOp>(createTosaToArithPass());
passManager.addNestedPass<func::FuncOp>(createTosaToTensorPass());
passManager.addNestedPass<func::FuncOp>(
iree_compiler::createTosaToLinalgExtPass());
passManager.addNestedPass<func::FuncOp>(mlir::createCanonicalizerPass());
Expand All @@ -60,8 +60,8 @@ void buildTOSAInputConversionPassPipeline(OpPassManager &passManager) {
iree_compiler::createConverti48Toi64Pass());

// Sometimes we generate more TOSA operations during the lowering to linalg.
passManager.addNestedPass<func::FuncOp>(tosa::createTosaToArith());
passManager.addNestedPass<func::FuncOp>(tosa::createTosaToTensor());
passManager.addNestedPass<func::FuncOp>(createTosaToArithPass());
passManager.addNestedPass<func::FuncOp>(createTosaToTensorPass());

passManager.addNestedPass<func::FuncOp>(
iree_compiler::createStripSignednessPass());
Expand Down
4 changes: 2 additions & 2 deletions compiler/plugins/input/TOSA/PluginRegistration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ struct TOSASession
PluginActivationPolicy::DefaultActivated> {
static void registerPasses() {
registerTOSAConversionPasses();
registerTosaToArith();
registerTosaToArithPass();
registerTosaToLinalg();
registerTosaToTensor();
registerTosaToTensorPass();
}

void onRegisterDialects(DialectRegistry &registry) override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
}
}
// CHECK-LABEL: util.func public @lhs_encoding
// CHECK: tensor.pack
// CHECK: tensor.unpack
// CHECK: linalg.pack
// CHECK: linalg.unpack
Comment on lines -22 to +23
Copy link
Member

@ScottTodd ScottTodd Feb 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please call out large changes like these in the PR description with some context? A link to the upstream commit (llvm/llvm-project@517800e, https://github.com/llvm/llvm-project/commit/517800e37e8d3a4ee84214bef65e227612c2a98b) would be ideal.

We don't have a PR template for LLVM integrates but here's what we usually look for:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh didn't know that you can save a click by putting llvm/llvm-project@3e61c1ab7f in the title. Thanks for the tip.

I also updated the PR title and the descriptions.

Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ matchDAGForUKernel(RewriterBase &rewriter, linalg::Mmt4DOp op,
}

static FailureOr<IREE::Codegen::UKernelOpInterface>
matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op,
matchDAGForUKernel(RewriterBase &rewriter, linalg::PackOp op,
bool /*skipIntermediateRoundings*/) {
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op);
const char ukernelName[] = "pack";
Expand Down Expand Up @@ -386,7 +386,7 @@ matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op,
}

static FailureOr<IREE::Codegen::UKernelOpInterface>
matchDAGForUKernel(RewriterBase &rewriter, tensor::UnPackOp op,
matchDAGForUKernel(RewriterBase &rewriter, linalg::UnPackOp op,
bool /*skipIntermediateRoundings*/) {
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op);
const char ukernelName[] = "unpack";
Expand Down Expand Up @@ -616,8 +616,8 @@ void CPULowerToUKernelsPass::runOnOperation() {
// these ops.
auto allTargets = [](auto target) { return true; };
patterns.insert<LowerToUKernelPattern<linalg::Mmt4DOp>,
LowerToUKernelPattern<tensor::PackOp>,
LowerToUKernelPattern<tensor::UnPackOp>>(
LowerToUKernelPattern<linalg::PackOp>,
LowerToUKernelPattern<linalg::UnPackOp>>(
context, allTargets, skipIntermediateRoundings);
// These patterns are inherently specific to the VMVX backend.
patterns.insert<LowerToUKernelPattern<IREE::Codegen::QueryTileSizesOp>>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ static void tileBatchDimsForBatchMmt4dOp(RewriterBase &rewriter,

static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter,
FunctionOpInterface funcOp) {
funcOp.walk([&](tensor::PackOp packOp) {
funcOp.walk([&](linalg::PackOp packOp) {
if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) {
return;
}
Expand Down Expand Up @@ -81,7 +81,7 @@ static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter,

static void tileNonPackedDimsFor5DPUnpackOps(RewriterBase &rewriter,
FunctionOpInterface funcOp) {
funcOp.walk([&](tensor::UnPackOp unpackOp) {
funcOp.walk([&](linalg::UnPackOp unpackOp) {
if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) {
return;
}
Expand Down Expand Up @@ -251,10 +251,10 @@ struct ConvertBatchMmt4DtoMmt4DPattern
}
};

struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
using OpRewritePattern<tensor::PackOp>::OpRewritePattern;
struct Convert3DPackto2DPackPattern : public OpRewritePattern<linalg::PackOp> {
using OpRewritePattern<linalg::PackOp>::OpRewritePattern;

LogicalResult matchAndRewrite(tensor::PackOp packOp,
LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) {
return failure();
Expand Down Expand Up @@ -309,7 +309,7 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp(
rewriter, loc, packOp.getDest(), reducedDestType);

auto newPackOp = rewriter.create<tensor::PackOp>(
auto newPackOp = rewriter.create<linalg::PackOp>(
loc, reducedSrc, reducedDest, newInnerDimsPos, packOp.getMixedTiles(),
packOp.getPaddingValue(), newOuterDimsPerm);

Expand All @@ -321,10 +321,10 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
};

struct Convert5DUnPackto4DUnPackPattern
: public OpRewritePattern<tensor::UnPackOp> {
using OpRewritePattern<tensor::UnPackOp>::OpRewritePattern;
: public OpRewritePattern<linalg::UnPackOp> {
using OpRewritePattern<linalg::UnPackOp>::OpRewritePattern;

LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp,
LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp,
PatternRewriter &rewriter) const override {
if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) {
return failure();
Expand Down Expand Up @@ -387,7 +387,7 @@ struct Convert5DUnPackto4DUnPackPattern
auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp(
rewriter, loc, unpackOp.getDest(), reducedDestType);

auto newUnpackOp = rewriter.create<tensor::UnPackOp>(
auto newUnpackOp = rewriter.create<linalg::UnPackOp>(
loc, reducedSrc, reducedDest, newInnerDimsPos, unpackOp.getMixedTiles(),
newOuterDimsPerm);

Expand Down Expand Up @@ -436,8 +436,8 @@ void CPUPrepareUkernelsPass::runOnOperation() {
tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx);
tensor::PackOp::getCanonicalizationPatterns(patterns, ctx);
tensor::UnPackOp::getCanonicalizationPatterns(patterns, ctx);
linalg::PackOp::getCanonicalizationPatterns(patterns, ctx);
linalg::UnPackOp::getCanonicalizationPatterns(patterns, ctx);
tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
tensor::populateFoldTensorEmptyPatterns(patterns);
if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ func.func @mmt4d_bf16bf16f32(%arg0 : tensor<?x?x16x2xbf16>, %arg1 : tensor<?x?x1
func.func @pack_i8i8_x86(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 : i8) -> tensor<?x?x7x8xi8> attributes {
hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "all", target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}>
} {
%result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
%result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
: tensor<?x?xi8> -> tensor<?x?x7x8xi8>
func.return %result : tensor<?x?x7x8xi8>
}
Expand Down Expand Up @@ -315,7 +315,7 @@ func.func @pack_i8i8_x86(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %ar
func.func @pack_i8i8(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 : i8) -> tensor<?x?x7x8xi8> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
%result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
: tensor<?x?xi8> -> tensor<?x?x7x8xi8>
func.return %result : tensor<?x?x7x8xi8>
}
Expand Down Expand Up @@ -344,7 +344,7 @@ func.func @pack_i8i8(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 :
func.func @pack_f16f16(%arg0 : tensor<?x?xf16>, %arg1 : tensor<?x?x7x8xf16>, %arg2 : f16) -> tensor<?x?x7x8xf16> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
%result = linalg.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
: tensor<?x?xf16> -> tensor<?x?x7x8xf16>
func.return %result : tensor<?x?x7x8xf16>
}
Expand Down Expand Up @@ -373,7 +373,7 @@ func.func @pack_f16f16(%arg0 : tensor<?x?xf16>, %arg1 : tensor<?x?x7x8xf16>, %ar
func.func @pack_bf16bf16(%arg0 : tensor<?x?xbf16>, %arg1 : tensor<?x?x7x8xbf16>, %arg2 : bf16) -> tensor<?x?x7x8xbf16> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
%result = linalg.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
: tensor<?x?xbf16> -> tensor<?x?x7x8xbf16>
func.return %result : tensor<?x?x7x8xbf16>
}
Expand Down Expand Up @@ -401,7 +401,7 @@ func.func @pack_bf16bf16(%arg0 : tensor<?x?xbf16>, %arg1 : tensor<?x?x7x8xbf16>,
func.func @pack_i32i32_transpose_inner(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?x?x7x8xi32>, %arg2 : i32) -> tensor<?x?x7x8xi32> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
%result = linalg.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
: tensor<?x?xi32> -> tensor<?x?x7x8xi32>
func.return %result : tensor<?x?x7x8xi32>
}
Expand Down Expand Up @@ -430,19 +430,19 @@ func.func @pack_i32i32_transpose_inner(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?
func.func @pack_f32f32_transpose_inner_and_outer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?x7x8xf32>, %arg2 : f32) -> tensor<?x?x7x8xf32> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
%result = linalg.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
: tensor<?x?xf32> -> tensor<?x?x7x8xf32>
func.return %result : tensor<?x?x7x8xf32>
}

// -----

// Check that tensor.pack is not lowered to a microkernel by default - it should
// Check that linalg.pack is not lowered to a microkernel by default - it should
// only be on VMVX.
// CHECK: func @unpack_f16f16_default
// CHECK: tensor.unpack
// CHECK: linalg.unpack
func.func @unpack_f16f16_default(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -> tensor<?x?xf16> {
%result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
%result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
: tensor<?x?x7x8xf16> -> tensor<?x?xf16>
func.return %result : tensor<?x?xf16>
}
Expand All @@ -468,7 +468,7 @@ func.func @unpack_f16f16_default(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?
func.func @unpack_f16f16(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -> tensor<?x?xf16> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
%result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
: tensor<?x?x7x8xf16> -> tensor<?x?xf16>
func.return %result : tensor<?x?xf16>
}
Expand All @@ -494,7 +494,7 @@ func.func @unpack_f16f16(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -
func.func @unpack_i32i32_transpose_inner(%arg0 : tensor<?x?x7x8xi32>, %arg1 : tensor<?x?xi32>) -> tensor<?x?xi32> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
%result = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
: tensor<?x?x7x8xi32> -> tensor<?x?xi32>
func.return %result : tensor<?x?xi32>
}
Expand All @@ -520,7 +520,7 @@ func.func @unpack_i32i32_transpose_inner(%arg0 : tensor<?x?x7x8xi32>, %arg1 : te
func.func @unpack_f32f32_transpose_inner_and_outer(%arg0 : tensor<?x?x7x8xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> attributes {
hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
} {
%result = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
%result = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
: tensor<?x?x7x8xf32> -> tensor<?x?xf32>
func.return %result : tensor<?x?xf32>
}
Expand Down
Loading
Loading