ConvertQuantizeDequantize: ignore consumers_count check

v-Golubev · v-Golubev · commit 1e758524dd5b · 2025-10-31T03:19:20.000+09:00
diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp
@@ -37,5 +37,6 @@ class ov::pass::ConvertQuantizeDequantize : public ov::pass::MatcherPass {
                                                                                          ov::element::u8,
                                                                                          ov::element::i16,
                                                                                          ov::element::u16},
-                              const ov::element::TypeVector& supported_original_precisions = {ov::element::f32});
+                              const ov::element::TypeVector& supported_original_precisions = {ov::element::f32},
+                              const bool ignore_consumers_count_check = false);
 };
diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp
@@ -72,7 +72,8 @@
 
 ov::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize(
     const ov::element::TypeVector& supported_low_precisions,
-    const ov::element::TypeVector& supported_original_precisions) {
+    const ov::element::TypeVector& supported_original_precisions,
+    const bool ignore_consumers_count_check) {
     MATCHER_SCOPE(ConvertQuantizeDequantize);
 
     using namespace ov::pass::pattern;
@@ -85,13 +86,18 @@ ov::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize(
     auto output_high_pattern = wrap_type<v0::Constant>();
     auto fq_pattern = wrap_type<v0::FakeQuantize>(
         {data_pattern, input_low_pattern, input_high_pattern, output_low_pattern, output_high_pattern});
-    auto convert1_pattern =
-        wrap_type<v0::Convert>({fq_pattern}, type_matches_any(supported_low_precisions) && consumers_count(1));
-    auto convert2_pattern =
-        wrap_type<v0::Convert>({convert1_pattern},
-                               type_matches_any(supported_original_precisions) && consumers_count(1));
+    op::Predicate convert1_predicate = ignore_consumers_count_check
+                                           ? type_matches_any(supported_low_precisions)
+                                           : type_matches_any(supported_low_precisions) && consumers_count(1);
+    auto convert1_pattern = wrap_type<v0::Convert>({fq_pattern}, convert1_predicate);
+    op::Predicate convert2_predicate = ignore_consumers_count_check
+                                           ? type_matches_any(supported_original_precisions)
+                                           : type_matches_any(supported_original_precisions) && consumers_count(1);
+    auto convert2_pattern = wrap_type<v0::Convert>({convert1_pattern}, convert2_predicate);
+
     auto zero_point_pattern = any_input();
-    auto sub_pattern = optional<v1::Subtract>({convert2_pattern, zero_point_pattern}, consumers_count(1));
+    op::Predicate sub_predicate = ignore_consumers_count_check ? op::Predicate() : consumers_count(1);
+    auto sub_pattern = optional<v1::Subtract>({convert2_pattern, zero_point_pattern}, sub_predicate);
     auto scale_pattern = any_input();
     auto mul_pattern = wrap_type<v1::Multiply>({sub_pattern, scale_pattern});
 
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -407,8 +407,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
             manager.register_pass<AddTransformation>();
             SERIALIZE_GRAPHS("add_transformation");
             // 2. Fuse FQ->Convert->DQ to a single FQ
-            manager.register_pass<ov::pass::ConvertQuantizeDequantize>(TypeVector{i16, u16, i32},
-                                                                       TypeVector{f16, f32});
+            manager.register_pass<ov::pass::ConvertQuantizeDequantize>(TypeVector{i16, u16, i32}, TypeVector{f16, f32}, true);
             SERIALIZE_GRAPHS("convert_qdq");
             // 3. Strip FQ layers with unsupported levels
             bool replace_with_clamp = true;