iree-org · lialan · Feb 19, 2025 · Feb 14, 2025 · Feb 18, 2025 · ScottTodd
@@ -36,16 +36,16 @@ void buildTOSAInputConversionPassPipeline(OpPassManager &passManager) {
   // Currently we don't handle SCF ops well and have to convert them all to CFG.
   // In the future it would be nice if we could have all of flow be both scf
   // and cfg compatible.
-  passManager.addNestedPass<func::FuncOp>(tosa::createTosaToSCF());
+  passManager.addNestedPass<func::FuncOp>(createTosaToSCFPass());
 
   // We also don't handle calls well on the old codepath; until we remove the
   // use of the CFG we can continue inlining.
   passManager.addPass(mlir::createInlinerPass());
 
   passManager.addNestedPass<func::FuncOp>(
       tosa::createTosaMakeBroadcastablePass());
-  passManager.addNestedPass<func::FuncOp>(tosa::createTosaToArith());
-  passManager.addNestedPass<func::FuncOp>(tosa::createTosaToTensor());
+  passManager.addNestedPass<func::FuncOp>(createTosaToArithPass());
+  passManager.addNestedPass<func::FuncOp>(createTosaToTensorPass());
   passManager.addNestedPass<func::FuncOp>(
       iree_compiler::createTosaToLinalgExtPass());
   passManager.addNestedPass<func::FuncOp>(mlir::createCanonicalizerPass());
@@ -60,8 +60,8 @@ void buildTOSAInputConversionPassPipeline(OpPassManager &passManager) {
       iree_compiler::createConverti48Toi64Pass());
 
   // Sometimes we generate more TOSA operations during the lowering to linalg.
-  passManager.addNestedPass<func::FuncOp>(tosa::createTosaToArith());
-  passManager.addNestedPass<func::FuncOp>(tosa::createTosaToTensor());
+  passManager.addNestedPass<func::FuncOp>(createTosaToArithPass());
+  passManager.addNestedPass<func::FuncOp>(createTosaToTensorPass());
 
   passManager.addNestedPass<func::FuncOp>(
       iree_compiler::createStripSignednessPass());

@@ -28,9 +28,9 @@ struct TOSASession
                            PluginActivationPolicy::DefaultActivated> {
   static void registerPasses() {
     registerTOSAConversionPasses();
-    registerTosaToArith();
+    registerTosaToArithPass();
     registerTosaToLinalg();
-    registerTosaToTensor();
+    registerTosaToTensorPass();
   }
 
   void onRegisterDialects(DialectRegistry &registry) override {

diff --git a/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir b/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir
@@ -19,5 +19,5 @@ module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
   }
 }
 // CHECK-LABEL: util.func public @lhs_encoding
-// CHECK:         tensor.pack
-// CHECK:         tensor.unpack
+// CHECK:         linalg.pack
+// CHECK:         linalg.unpack
@@ -263,7 +263,7 @@ matchDAGForUKernel(RewriterBase &rewriter, linalg::Mmt4DOp op,
 }
 
 static FailureOr<IREE::Codegen::UKernelOpInterface>
-matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op,
+matchDAGForUKernel(RewriterBase &rewriter, linalg::PackOp op,
                    bool /*skipIntermediateRoundings*/) {
   auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op);
   const char ukernelName[] = "pack";
@@ -386,7 +386,7 @@ matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op,
 }
 
 static FailureOr<IREE::Codegen::UKernelOpInterface>
-matchDAGForUKernel(RewriterBase &rewriter, tensor::UnPackOp op,
+matchDAGForUKernel(RewriterBase &rewriter, linalg::UnPackOp op,
                    bool /*skipIntermediateRoundings*/) {
   auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op);
   const char ukernelName[] = "unpack";
@@ -616,8 +616,8 @@ void CPULowerToUKernelsPass::runOnOperation() {
   // these ops.
   auto allTargets = [](auto target) { return true; };
   patterns.insert<LowerToUKernelPattern<linalg::Mmt4DOp>,
-                  LowerToUKernelPattern<tensor::PackOp>,
-                  LowerToUKernelPattern<tensor::UnPackOp>>(
+                  LowerToUKernelPattern<linalg::PackOp>,
+                  LowerToUKernelPattern<linalg::UnPackOp>>(
       context, allTargets, skipIntermediateRoundings);
   // These patterns are inherently specific to the VMVX backend.
   patterns.insert<LowerToUKernelPattern<IREE::Codegen::QueryTileSizesOp>>(

@@ -46,7 +46,7 @@ static void tileBatchDimsForBatchMmt4dOp(RewriterBase &rewriter,
 
 static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter,
                                           FunctionOpInterface funcOp) {
-  funcOp.walk([&](tensor::PackOp packOp) {
+  funcOp.walk([&](linalg::PackOp packOp) {
     if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) {
       return;
     }
@@ -81,7 +81,7 @@ static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter,
 
 static void tileNonPackedDimsFor5DPUnpackOps(RewriterBase &rewriter,
                                              FunctionOpInterface funcOp) {
-  funcOp.walk([&](tensor::UnPackOp unpackOp) {
+  funcOp.walk([&](linalg::UnPackOp unpackOp) {
     if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) {
       return;
     }
@@ -251,10 +251,10 @@ struct ConvertBatchMmt4DtoMmt4DPattern
   }
 };
 
-struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
-  using OpRewritePattern<tensor::PackOp>::OpRewritePattern;
+struct Convert3DPackto2DPackPattern : public OpRewritePattern<linalg::PackOp> {
+  using OpRewritePattern<linalg::PackOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override {
     if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) {
       return failure();
@@ -309,7 +309,7 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
     auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp(
         rewriter, loc, packOp.getDest(), reducedDestType);
 
-    auto newPackOp = rewriter.create<tensor::PackOp>(
+    auto newPackOp = rewriter.create<linalg::PackOp>(
         loc, reducedSrc, reducedDest, newInnerDimsPos, packOp.getMixedTiles(),
         packOp.getPaddingValue(), newOuterDimsPerm);
 
@@ -321,10 +321,10 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
 };
 
 struct Convert5DUnPackto4DUnPackPattern
-    : public OpRewritePattern<tensor::UnPackOp> {
-  using OpRewritePattern<tensor::UnPackOp>::OpRewritePattern;
+    : public OpRewritePattern<linalg::UnPackOp> {
+  using OpRewritePattern<linalg::UnPackOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp,
+  LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp,
                                 PatternRewriter &rewriter) const override {
     if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) {
       return failure();
@@ -387,7 +387,7 @@ struct Convert5DUnPackto4DUnPackPattern
     auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp(
         rewriter, loc, unpackOp.getDest(), reducedDestType);
 
-    auto newUnpackOp = rewriter.create<tensor::UnPackOp>(
+    auto newUnpackOp = rewriter.create<linalg::UnPackOp>(
         loc, reducedSrc, reducedDest, newInnerDimsPos, unpackOp.getMixedTiles(),
         newOuterDimsPerm);
 
@@ -436,8 +436,8 @@ void CPUPrepareUkernelsPass::runOnOperation() {
   tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx);
-  tensor::PackOp::getCanonicalizationPatterns(patterns, ctx);
-  tensor::UnPackOp::getCanonicalizationPatterns(patterns, ctx);
+  linalg::PackOp::getCanonicalizationPatterns(patterns, ctx);
+  linalg::UnPackOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::populateFoldTensorEmptyPatterns(patterns);
   if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {

@@ -287,7 +287,7 @@ func.func @mmt4d_bf16bf16f32(%arg0 : tensor<?x?x16x2xbf16>, %arg1 : tensor<?x?x1
 func.func @pack_i8i8_x86(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 : i8) -> tensor<?x?x7x8xi8> attributes {
   hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "all", target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xi8> -> tensor<?x?x7x8xi8>
   func.return %result : tensor<?x?x7x8xi8>
 }
@@ -315,7 +315,7 @@ func.func @pack_i8i8_x86(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %ar
 func.func @pack_i8i8(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 : i8) -> tensor<?x?x7x8xi8> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xi8> -> tensor<?x?x7x8xi8>
   func.return %result : tensor<?x?x7x8xi8>
 }
@@ -344,7 +344,7 @@ func.func @pack_i8i8(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 :
 func.func @pack_f16f16(%arg0 : tensor<?x?xf16>, %arg1 : tensor<?x?x7x8xf16>, %arg2 : f16) -> tensor<?x?x7x8xf16> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xf16> -> tensor<?x?x7x8xf16>
   func.return %result : tensor<?x?x7x8xf16>
 }
@@ -373,7 +373,7 @@ func.func @pack_f16f16(%arg0 : tensor<?x?xf16>, %arg1 : tensor<?x?x7x8xf16>, %ar
 func.func @pack_bf16bf16(%arg0 : tensor<?x?xbf16>, %arg1 : tensor<?x?x7x8xbf16>, %arg2 : bf16) -> tensor<?x?x7x8xbf16> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xbf16> -> tensor<?x?x7x8xbf16>
   func.return %result : tensor<?x?x7x8xbf16>
 }
@@ -401,7 +401,7 @@ func.func @pack_bf16bf16(%arg0 : tensor<?x?xbf16>, %arg1 : tensor<?x?x7x8xbf16>,
 func.func @pack_i32i32_transpose_inner(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?x?x7x8xi32>, %arg2 : i32) -> tensor<?x?x7x8xi32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xi32> -> tensor<?x?x7x8xi32>
   func.return %result : tensor<?x?x7x8xi32>
 }
@@ -430,19 +430,19 @@ func.func @pack_i32i32_transpose_inner(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?
 func.func @pack_f32f32_transpose_inner_and_outer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?x7x8xf32>, %arg2 : f32) -> tensor<?x?x7x8xf32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xf32> -> tensor<?x?x7x8xf32>
   func.return %result : tensor<?x?x7x8xf32>
 }
 
 // -----
 
-// Check that tensor.pack is not lowered to a microkernel by default - it should
+// Check that linalg.pack is not lowered to a microkernel by default - it should
 // only be on VMVX.
 // CHECK: func @unpack_f16f16_default
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
 func.func @unpack_f16f16_default(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -> tensor<?x?xf16> {
-  %result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xf16> -> tensor<?x?xf16>
   func.return %result : tensor<?x?xf16>
 }
@@ -468,7 +468,7 @@ func.func @unpack_f16f16_default(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?
 func.func @unpack_f16f16(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -> tensor<?x?xf16> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xf16> -> tensor<?x?xf16>
   func.return %result : tensor<?x?xf16>
 }
@@ -494,7 +494,7 @@ func.func @unpack_f16f16(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -
 func.func @unpack_i32i32_transpose_inner(%arg0 : tensor<?x?x7x8xi32>, %arg1 : tensor<?x?xi32>) -> tensor<?x?xi32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xi32> -> tensor<?x?xi32>
   func.return %result : tensor<?x?xi32>
 }
@@ -520,7 +520,7 @@ func.func @unpack_i32i32_transpose_inner(%arg0 : tensor<?x?x7x8xi32>, %arg1 : te
 func.func @unpack_f32f32_transpose_inner_and_outer(%arg0 : tensor<?x?x7x8xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xf32> -> tensor<?x?xf32>
   func.return %result : tensor<?x?xf32>
 }