address review comments I

sdasgup3 · sdasgup3 · commit b09432fcc475 · 2025-03-13T02:32:57.000Z
diff --git a/mlir/include/mlir/Dialect/Quant/Transforms/Passes.td b/mlir/include/mlir/Dialect/Quant/Transforms/Passes.td
@@ -31,7 +31,7 @@ def LowerQuantOps : Pass<"lower-quant-ops", "func::FuncOp"> {
   ];
 }
 
-def NormalizeQuantTypes : Pass<"normalize-quant-types"> {
+def NormalizeQuantTypes : Pass<"normalize-quant-types", "func::FuncOp"> {
   let summary = "Normalize generic quantized types to specific quantized types";
   let description = [{
     This pass converts generic quantized types in the `quant` dialect to more
@@ -60,6 +60,11 @@ def NormalizeQuantTypes : Pass<"normalize-quant-types"> {
           -> `!quant.uniform<i8:f32, 2.0>`
        * `tensor<?x?x!quant.uniform<i8:f32:{0:1, 0:4}, {{2.0}}>>`
           -> `tensor<?x?x!quant.uniform<i8:f32, 2.0>>`
+
+    The rationale for these conversions is that the decompositions / handling of
+    more precise quantized types tends to be more efficient than treating
+    everything as subchannel type.
+
   }];
   let dependentDialects = ["func::FuncDialect", "quant::QuantDialect"];
 }
diff --git a/mlir/lib/Dialect/Quant/IR/QuantOps.cpp b/mlir/lib/Dialect/Quant/IR/QuantOps.cpp
@@ -167,16 +167,19 @@ LogicalResult verifyQuantizationOp(Operation *op, QuantizedType quantizedType,
     return op->emitError(
         "expressed type in quantized type expected to match float type");
 
-  // Veriy integrity of per-axis quantization information, if present.
+  // Verify integrity of per-axis quantization information, if present.
   if (auto quantizedPerAxisType =
           dyn_cast<UniformQuantizedPerAxisType>(quantizedType)) {
     return verifyPerAxisQuantization(op, quantizedPerAxisType, containerType);
-  } else if (auto quantizedSubChannelType =
-                 dyn_cast<UniformQuantizedSubChannelType>(quantizedType)) {
+  }
+
+  if (auto quantizedSubChannelType =
+          dyn_cast<UniformQuantizedSubChannelType>(quantizedType)) {
     return verifySubChannelQuantization(op, quantizedSubChannelType,
                                         containerType);
   }
 
+  // At this point the type is UniformQuantizedType
   return success();
 }
 
@@ -268,14 +271,18 @@ LogicalResult StorageCastOp::verify() {
   // the quantization type may appear in the input or the result, their tensor
   // shapes are guaranteed to be identical at this point.
   if (auto quantizedPerAxisType =
-          dyn_cast<UniformQuantizedPerAxisType>(quantizedType))
+          dyn_cast<UniformQuantizedPerAxisType>(quantizedType)) {
     return verifyPerAxisQuantization(*this, quantizedPerAxisType,
                                      getInput().getType());
-  else if (auto quantizedSunChannelType =
-               dyn_cast<UniformQuantizedSubChannelType>(quantizedType))
+  }
+
+  if (auto quantizedSunChannelType =
+          dyn_cast<UniformQuantizedSubChannelType>(quantizedType)) {
     return verifySubChannelQuantization(*this, quantizedSunChannelType,
                                         getInput().getType());
+  }
 
+  // At this point the type is UniformQuantizedType
   return success();
 }
 
diff --git a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp
@@ -518,12 +518,10 @@ static void
 printBlockSizeInfo(ArrayRef<std::pair<int32_t, int64_t>> blockSizeInfo,
                    DialectAsmPrinter &out) {
   out << "{";
-  llvm::interleave(
-      llvm::seq<size_t>(0, blockSizeInfo.size()), out,
-      [&](size_t index) {
+  llvm::interleaveComma(
+      llvm::seq<size_t>(0, blockSizeInfo.size()), out, [&](size_t index) {
         out << blockSizeInfo[index].first << ":" << blockSizeInfo[index].second;
-      },
-      ",");
+      });
   out << "}";
 }
 
@@ -593,7 +591,7 @@ void printDenseQuantizationParameters(ArrayRef<APFloat> scales,
   SmallVector<unsigned, 4> counter(rank, 0);
   unsigned openBrackets = 0;
 
-  auto bumpCounter = [&]() {
+  auto incrementCounterAndDelimit = [&]() {
     ++counter[rank - 1];
     for (unsigned i = rank - 1; i > 0; --i) {
       if (counter[i] >= shape[i]) {
@@ -605,7 +603,7 @@ void printDenseQuantizationParameters(ArrayRef<APFloat> scales,
     }
   };
 
-  for (unsigned idx = 0, e = scales.size(); idx != e; ++idx) {
+  for (unsigned idx = 0, e = scales.size(); idx < e; ++idx) {
     if (idx != 0)
       out << ", ";
     while (openBrackets++ < rank)
@@ -615,7 +613,7 @@ void printDenseQuantizationParameters(ArrayRef<APFloat> scales,
     if (zeroPoints[idx] != 0) {
       out << ":" << zeroPoints[idx];
     }
-    bumpCounter();
+    incrementCounterAndDelimit();
   }
   while (openBrackets-- > 0)
     out << '}';
diff --git a/mlir/lib/Dialect/Quant/Transforms/NormalizeQuantTypes.cpp b/mlir/lib/Dialect/Quant/Transforms/NormalizeQuantTypes.cpp
@@ -145,7 +145,6 @@ class NormalizeQuantTypes
 public:
   void runOnOperation() override {
 
-    auto moduleOp = cast<ModuleOp>(getOperation());
     auto *context = &getContext();
 
     NormalizedQuantTypesConverter typeConverter;
@@ -168,7 +167,8 @@ class NormalizeQuantTypes
     patterns.add<ConvertGenericOpwithSubChannelType>(typeConverter, context);
 
     // Apply conversion
-    if (failed(applyFullConversion(moduleOp, target, std::move(patterns))))
+    if (failed(
+            applyFullConversion(getOperation(), target, std::move(patterns))))
       signalPassFailure();
   }
 };
diff --git a/mlir/test/CAPI/quant.c b/mlir/test/CAPI/quant.c
@@ -210,7 +210,7 @@ void testUniformSubChannelType(MlirContext ctx) {
 
   MlirType subChannelParsed =
       mlirTypeParseGet(ctx, mlirStringRefCreateFromCString(
-                                "!quant.uniform<i8:f32:{0:1,1:2}, "
+                                "!quant.uniform<i8:f32:{0:1, 1:2}, "
                                 "{{2.0:10, 3.0:20}, {4.0:30, 5.0:40}}>"));
 
   MlirType i8 = mlirIntegerTypeGet(ctx, 8);
@@ -321,7 +321,7 @@ void testUniformSubChannelType(MlirContext ctx) {
   // CHECK: equal: 1
   fprintf(stderr, "equal: %d\n", mlirTypeEqual(subChannel, subChannelParsed));
 
-  // CHECK: !quant.uniform<i8:f32:{0:1,1:2},
+  // CHECK: !quant.uniform<i8:f32:{0:1, 1:2},
   // {{.*}}2.000000e+00:10, 3.000000e+00:20},
   // {4.000000e+00:30, 5.000000e+00:40{{.*}}}}>
   mlirTypeDump(subChannel);
diff --git a/mlir/test/Dialect/Quant/Bytecode/types.mlir b/mlir/test/Dialect/Quant/Bytecode/types.mlir
@@ -70,6 +70,6 @@ module @parseUniformPerAxisMixed attributes {
 
 // CHECK-LABEL: parseUniformSubChannel
 module @parseUniformSubChannel attributes {
-  // CHECK: !quant.uniform<i8:f32:{0:1,1:2}, {{\{}}{2.000000e+00:10, 3.000000e+00:20}, {4.000000e+00:30, 5.000000e+00:40}}>
+  // CHECK: !quant.uniform<i8:f32:{0:1, 1:2}, {{\{}}{2.000000e+00:10, 3.000000e+00:20}, {4.000000e+00:30, 5.000000e+00:40}}>
   bytecode.test = !quant.uniform<i8:f32:{0:1, 1:2}, {{2.0:10, 3.0:20}, {4.0:30, 5.0:40}}>
 } {}
diff --git a/mlir/test/Dialect/Quant/lower-quant-ops.mlir b/mlir/test/Dialect/Quant/lower-quant-ops.mlir
@@ -535,7 +535,7 @@ func.func @qcast_per_channel_unranked(%arg0: tensor<*xf32>) -> tensor<*x!qalias>
 // CHECK:   linalg.yield %[[STORED_INT]] : i8
 // CHECK: } -> tensor<2x?x?x4xi8>
 
-// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[GENERIC]] : tensor<2x?x?x4xi8> to tensor<2x?x?x4x!quant.uniform<i8:f32:{0:1,3:2}, {{.*}}2.000000e+00:10, 3.000000e+00:20{{.*}}, {{.*}}4.000000e+00:30, 5.000000e+00:40{{.*}}>>
+// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[GENERIC]] : tensor<2x?x?x4xi8> to tensor<2x?x?x4x!quant.uniform<i8:f32:{0:1, 3:2}, {{.*}}2.000000e+00:10, 3.000000e+00:20{{.*}}, {{.*}}4.000000e+00:30, 5.000000e+00:40{{.*}}>>
 // CHECK: return %[[STORED_QUANT]]
 
 !qalias = !quant.uniform<i8:f32:{0:1, 3:2}, {{{{2.0:10, 3.0:20}}}, {{{4.0:30, 5.0:40}}}}>
@@ -565,7 +565,7 @@ func.func @qcast_sub_channel_ranked(%arg0: tensor<2x?x?x4xf32>) -> tensor<2x?x?x
 // CHECK:   linalg.yield %[[STORED_INT]] : i8
 // CHECK: } -> tensor<2x3x5x4xi8>
 
-// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[GENERIC]] : tensor<2x3x5x4xi8> to tensor<2x3x5x4x!quant.uniform<i8:f32:{0:1,3:2}, {{.*}}2.000000e+00:10, 3.000000e+00:20{{.*}}, {{.*}}4.000000e+00:30, 5.000000e+00:40{{.*}}>>
+// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[GENERIC]] : tensor<2x3x5x4xi8> to tensor<2x3x5x4x!quant.uniform<i8:f32:{0:1, 3:2}, {{.*}}2.000000e+00:10, 3.000000e+00:20{{.*}}, {{.*}}4.000000e+00:30, 5.000000e+00:40{{.*}}>>
 // CHECK: return %[[STORED_QUANT]]
 
 !qalias = !quant.uniform<i8:f32:{0:1, 3:2}, {{{{2.0:10, 3.0:20}}}, {{{4.0:30, 5.0:40}}}}>
diff --git a/mlir/test/Dialect/Quant/parse-uniform.mlir b/mlir/test/Dialect/Quant/parse-uniform.mlir
@@ -157,8 +157,8 @@ func.func @parse() -> !qalias {
 
 // -----
 // Sub-channel scales and zero points (mixed affine and fixedpoint)
-// CHECK: !quant.uniform<u8:f32:{0:1,1:2}, {{\{}}{2.000000e+00:120, 3.000000e+00:127}, {4.000000e+00, 5.000000e+00}}>
-!qalias = !quant.uniform<u8:f32:{0:1,1:2}, {{2.0:120,3.0:127}, {4.0,5.0}}>
+// CHECK: !quant.uniform<u8:f32:{0:1, 1:2}, {{\{}}{2.000000e+00:120, 3.000000e+00:127}, {4.000000e+00, 5.000000e+00}}>
+!qalias = !quant.uniform<u8:f32:{0:1, 1:2}, {{2.0:120,3.0:127}, {4.0,5.0}}>
 func.func @parse() -> !qalias {
   %0 = "foo"() : () -> !qalias
   return %0 : !qalias