Add pass to normalize generic quantized types to specific quantized types

sdasgup3 · sdasgup3 · commit ce179a564273 · 2025-01-03T18:17:47.000Z
diff --git a/mlir/include/mlir/Dialect/Quant/Transforms/Passes.td b/mlir/include/mlir/Dialect/Quant/Transforms/Passes.td
@@ -31,6 +31,39 @@ def LowerQuantOps : Pass<"lower-quant-ops", "func::FuncOp"> {
   ];
 }
 
+def NormalizeQuantTypes : Pass<"normalize-quant-types"> {
+  let summary = "Normalize generic quantized types to specific quantized types";
+  let description = [{
+    This pass converts generic quantized types in the `quant` dialect to more
+    specific types when possible.
+
+    The following conversions are performed:
+
+    1. Sub-channel to per-axis: If the shape of the scales tensor of sub-channel
+       quantized type has all but one non-one value, it is converted to a
+       per-axis quantized type.
+       
+       For example:
+       
+       * `!quant.uniform<i8:f32:{0:1}, {{2.0}, {3.0}}>` 
+          -> `!quant.uniform<i8:f32:0, {2.0, 3.0}>`
+       * `tensor<?x?x!quant.uniform<i8:f32:{0:1,1:4}, {{2.0}, {3.0}}>>` 
+          -> `tensor<?x?x!quant.uniform<i8:f32:0, {2.0, 3.0}>>`
+
+    2. Sub-channel to per-tensor: If a sub-channel quantized type has only 
+       one scale or zero-point, it is converted to a per-tensor 
+       quantized type.
+       
+       For example:
+       
+       * `!quant.uniform<i8:f32:{}, {{2.0}}>`
+          -> `!quant.uniform<i8:f32, 2.0>`
+       * `tensor<?x?x!quant.uniform<i8:f32:{0:1, 0:4}, {{2.0}}>>`
+          -> `tensor<?x?x!quant.uniform<i8:f32, 2.0>>`
+  }];
+  let dependentDialects = ["func::FuncDialect", "quant::QuantDialect"];
+}
+
 def StripFuncQuantTypes : Pass<"strip-func-quant-types"> {
   let summary = "Strip quantized types from function headers";
   let description = [{
diff --git a/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_mlir_dialect_library(MLIRQuantTransforms
   LowerQuantOps.cpp
+  NormalizeQuantTypes.cpp
   StripFuncQuantTypes.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/mlir/lib/Dialect/Quant/Transforms/NormalizeQuantTypes.cpp b/mlir/lib/Dialect/Quant/Transforms/NormalizeQuantTypes.cpp
@@ -0,0 +1,179 @@
+//===- NormalizeQuantTypes.cpp - Normalize quantized types
+//----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Normalize generic quantized types to specific quantized types
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Func/Transforms/FuncConversions.h"
+#include "mlir/Dialect/Quant/IR/Quant.h"
+#include "mlir/Dialect/Quant/IR/QuantTypes.h"
+#include "mlir/Dialect/Quant/Transforms/Passes.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace mlir {
+namespace quant {
+
+#define GEN_PASS_DEF_NORMALIZEQUANTTYPES
+#include "mlir/Dialect/Quant/Transforms/Passes.h.inc"
+
+namespace {
+
+/// Returns true if the given sub-channel quantized type is convertible to a
+/// per-tensor quantized type. This is true if the sub-channel type has only
+/// one scale and one zero point.
+///
+/// Assumes that `tensorType` is a tensor with element type
+/// `quant::UniformQuantizedSubChannelType`.
+static bool isConvertibleToPerTensor(TensorType tensorType) {
+  return cast<UniformQuantizedSubChannelType>(tensorType.getElementType())
+             .getScales()
+             .getType()
+             .getNumElements() == 1;
+}
+
+/// Returns true if the given sub-channel quantized type is convertible to a
+/// per-axis quantized type. This is true if the shape of the scales tensor has
+/// all but one non-one value.
+///
+/// Assumes that `tensorType` is a tensor with element type
+/// `quant::UniformQuantizedSubChannelType`.
+static bool isConvertibleToPerAxis(TensorType tensorType) {
+  auto shape = cast<UniformQuantizedSubChannelType>(tensorType.getElementType())
+                   .getScales()
+                   .getType()
+                   .getShape();
+  return llvm::count_if(shape, [](int64_t dim) { return dim != 1; }) == 1;
+}
+
+/// This class defines a type converter that converts sub-channel quantized
+/// types to per-tensor or per-axis quantized types whenever possible.
+class NormalizedQuantTypesConverter : public TypeConverter {
+
+  static Type convertType(Type type) {
+    auto tensorType = dyn_cast<TensorType>(type);
+    if (!tensorType) {
+      return type;
+    }
+
+    auto subChannelType =
+        dyn_cast<UniformQuantizedSubChannelType>(tensorType.getElementType());
+    if (!subChannelType) {
+      return type;
+    }
+
+    if (isConvertibleToPerTensor(tensorType)) {
+      double scale =
+          subChannelType.getScales().getValues<APFloat>()[0].convertToDouble();
+      int64_t zeroPoint =
+          subChannelType.getZeroPoints().getValues<APInt>()[0].getSExtValue();
+      auto perTensorType = UniformQuantizedType::get(
+          subChannelType.getFlags(), subChannelType.getStorageType(),
+          subChannelType.getExpressedType(), scale, zeroPoint,
+          subChannelType.getStorageTypeMin(),
+          subChannelType.getStorageTypeMax());
+      return tensorType.clone(perTensorType);
+    }
+
+    if (isConvertibleToPerAxis(tensorType)) {
+      auto shape = subChannelType.getScales().getType().getShape();
+      auto quantizedDimItr =
+          llvm::find_if(shape, [](int64_t dim) { return dim != 1; });
+      auto scales = llvm::to_vector(llvm::map_range(
+          subChannelType.getScales().getValues<APFloat>(),
+          [](APFloat scale) { return scale.convertToDouble(); }));
+      auto zeroPoints = llvm::to_vector(llvm::map_range(
+          subChannelType.getZeroPoints().getValues<APInt>(),
+          [](APInt zeroPoint) { return zeroPoint.getSExtValue(); }));
+      auto perAxisType = UniformQuantizedPerAxisType::get(
+          subChannelType.getFlags(), subChannelType.getStorageType(),
+          subChannelType.getExpressedType(), scales, zeroPoints,
+          quantizedDimItr - shape.begin(), subChannelType.getStorageTypeMin(),
+          subChannelType.getStorageTypeMax());
+      return tensorType.clone(perAxisType);
+    }
+    return type;
+  }
+
+public:
+  explicit NormalizedQuantTypesConverter() { addConversion(convertType); }
+};
+
+/// This class implements a conversion pattern that converts any generic
+/// operation with sub-channel quantized types to an equivalent operation with
+/// per-tensor or per-axis quantized types.
+class ConvertGenericOpwithSubChannelType : public ConversionPattern {
+public:
+  ConvertGenericOpwithSubChannelType(TypeConverter &typeConverter,
+                                     MLIRContext *context)
+      : ConversionPattern(typeConverter, MatchAnyOpTypeTag{}, 0, context) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    SmallVector<Type> resultTypes;
+    if (failed(typeConverter->convertTypes(op->getResultTypes(), resultTypes)))
+      return failure();
+
+    auto *newOp = Operation::create(
+        op->getLoc(), op->getName(), resultTypes, operands, op->getAttrs(),
+        op->getPropertiesStorage(), op->getSuccessors(), op->getNumRegions());
+    for (auto regions : llvm::zip(op->getRegions(), newOp->getRegions())) {
+      Region &before = std::get<0>(regions);
+      Region &parent = std::get<1>(regions);
+      rewriter.inlineRegionBefore(before, parent, parent.end());
+      if (failed(rewriter.convertRegionTypes(&parent, *typeConverter)))
+        return failure();
+    }
+    rewriter.insert(newOp);
+    rewriter.replaceOp(op, newOp->getResults());
+    return success();
+  }
+};
+
+// Conversion pass
+class NormalizeQuantTypes
+    : public impl::NormalizeQuantTypesBase<NormalizeQuantTypes> {
+public:
+  void runOnOperation() override {
+
+    auto moduleOp = cast<ModuleOp>(getOperation());
+    auto *context = &getContext();
+
+    NormalizedQuantTypesConverter typeConverter;
+    ConversionTarget target(*context);
+
+    // Determine legal operations.
+    target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp op) {
+      return typeConverter.isSignatureLegal(op.getFunctionType()) &&
+             typeConverter.isLegal(&op.getBody());
+    });
+    target.markUnknownOpDynamicallyLegal([&](Operation *op) {
+      return typeConverter.isLegal(op->getOperandTypes()) &&
+             typeConverter.isLegal(op->getResultTypes());
+    });
+
+    // Register conversion patterns
+    RewritePatternSet patterns(context);
+    populateFunctionOpInterfaceTypeConversionPattern<func::FuncOp>(
+        patterns, typeConverter);
+    patterns.add<ConvertGenericOpwithSubChannelType>(typeConverter, context);
+
+    // Apply conversion
+    if (failed(applyFullConversion(moduleOp, target, std::move(patterns))))
+      signalPassFailure();
+  }
+};
+
+} // namespace
+
+} // namespace quant
+} // namespace mlir
diff --git a/mlir/test/Dialect/Quant/normalize-quant-types.mlir b/mlir/test/Dialect/Quant/normalize-quant-types.mlir
@@ -0,0 +1,51 @@
+// RUN: mlir-opt %s --normalize-quant-types --split-input-file | FileCheck %s
+
+// CHECK-LABEL: @callee(
+// CHECK-SAME: [[PER_TENSOR:tensor<\?x\?x!quant.uniform<i8:f32, 2.000000e\+00:127>>]],
+// CHECK-SAME: [[PER_TENSOR]]
+// CHECK-SAME: ([[PER_TENSOR]], [[PER_TENSOR]])
+// CHECK-LABEL: @normalize_quant_types_to_per_tensor
+// CHECK-SAME: %[[ARG_0:.*]]: [[PER_TENSOR:tensor<\?x\?x!quant.uniform<i8:f32, 2.000000e\+00:127>>]],
+// CHECK-SAME: %[[ARG_1:.*]]: [[PER_TENSOR]]
+// CHECK-SAME: ([[PER_TENSOR]], [[PER_TENSOR]])
+// CHECK: %[[TEMP_0:.*]] = "test.custom_op"(%[[ARG_0]]) : ([[PER_TENSOR]]) -> [[PER_TENSOR]]
+// CHECK: %[[TEMP_1:.*]] = "test.custom_op"(%[[ARG_1]]) : ([[PER_TENSOR]]) -> [[PER_TENSOR]]
+// CHECK: %[[TEMP_3:.*]]:2 = call @callee(%[[TEMP_0]], %[[TEMP_1]])
+// CHECK: return %[[TEMP_3]]#0, %[[TEMP_3]]#1 : [[PER_TENSOR]], [[PER_TENSOR]]
+
+!qalias1 = !quant.uniform<i8:f32:{}, {{2.0:127}}>
+!qalias2 = !quant.uniform<i8:f32:{0:1,1:4}, {{2.0:127}}>
+
+func.func private @callee(tensor<?x?x!qalias1>, tensor<?x?x!qalias2>) -> (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>)
+
+func.func @normalize_quant_types_to_per_tensor(%arg0: tensor<?x?x!qalias1>,
+    %arg1: tensor<?x?x!qalias2>) -> (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>) {
+  %0 = "test.custom_op"(%arg0) : (tensor<?x?x!qalias1>) -> tensor<?x?x!qalias1>
+  %1 = "test.custom_op"(%arg1) : (tensor<?x?x!qalias2>) -> tensor<?x?x!qalias2>
+  %3:2 = func.call @callee(%0, %1) : (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>) -> (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>)
+  return %3#0, %3#1 : tensor<?x?x!qalias1>, tensor<?x?x!qalias2>
+}
+
+// -----
+
+// CHECK-LABEL: @normalize_quant_types_to_per_axis
+// CHECK-SAME: %[[ARG_0:.*]]: [[PER_AXIS:tensor<\?x\?x!quant.uniform<i8:f32:0, \{2.000000e\+00:127,3.000000e\+00:127\}>>]],
+// CHECK-SAME: %[[ARG_1:.*]]: [[PER_AXIS]]
+// CHECK-SAME: ([[PER_AXIS]], [[PER_AXIS]])
+// CHECK: %[[TEMP_0:.*]] = "test.custom_op"(%[[ARG_0]]) : ([[PER_AXIS]]) -> [[PER_AXIS]]
+// CHECK: %[[TEMP_1:.*]] = "test.custom_op"(%[[ARG_1]]) : ([[PER_AXIS]]) -> [[PER_AXIS]]
+// CHECK: %[[TEMP_3:.*]]:2 = call @callee(%[[TEMP_0]], %[[TEMP_1]])
+// CHECK: return %[[TEMP_3]]#0, %[[TEMP_3]]#1 : [[PER_AXIS]], [[PER_AXIS]]
+
+!qalias1 = !quant.uniform<i8:f32:{0:1}, {{2.0:127}, {3.0:127}}>
+!qalias2 = !quant.uniform<i8:f32:{0:1,1:4}, {{2.0:127}, {3.0:127}}>
+
+func.func private @callee(tensor<?x?x!qalias1>, tensor<?x?x!qalias2>) -> (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>)
+
+func.func @normalize_quant_types_to_per_axis(%arg0: tensor<?x?x!qalias1>,
+    %arg1: tensor<?x?x!qalias2>) -> (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>) {
+  %0 = "test.custom_op"(%arg0) : (tensor<?x?x!qalias1>) -> tensor<?x?x!qalias1>
+  %1 = "test.custom_op"(%arg1) : (tensor<?x?x!qalias2>) -> tensor<?x?x!qalias2>
+  %3:2 = func.call @callee(%0, %1) : (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>) -> (tensor<?x?x!qalias1>, tensor<?x?x!qalias2>)
+  return %3#0, %3#1 : tensor<?x?x!qalias1>, tensor<?x?x!qalias2>
+}