diff --git a/src/common/low_precision_transformations/include/low_precision/qdq_stripping.hpp b/src/common/low_precision_transformations/include/low_precision/qdq_stripping.hpp new file mode 100644 index 00000000000000..318c0b8c6b5a26 --- /dev/null +++ b/src/common/low_precision_transformations/include/low_precision/qdq_stripping.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "lpt_visibility.hpp" +#include "openvino/pass/matcher_pass.hpp" +#include "quantization_details.hpp" + +namespace ov { +namespace pass { +namespace low_precision { + +/** + * @ingroup ov_transformation_common_api + * @brief FQStrippingTransformation strips FakeQuantize operations with specified levels + * by replacing them with Clamp operations. + */ +class LP_TRANSFORMATIONS_API FQStrippingTransformation : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("FQStrippingTransformation", "0", MatcherPass); + FQStrippingTransformation(const std::set& levels_to_strip, bool replace_with_clamp); +}; + +} // namespace low_precision +} // namespace pass +} // namespace ov \ No newline at end of file diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index 5996f0332c968c..840bc30404e34a 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -323,7 +323,7 @@ bool LowPrecision::isFunctionQuantized(const std::shared_ptr& m } else if (const auto multiSubGraph = ov::as_type_ptr(node)) { // Look inside subraph operations, such as TensorIterator, Loop, If, etc for (size_t i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++) { - if (isFunctionQuantized(multiSubGraph->get_function(i))) { + if (isFunctionQuantized(multiSubGraph->get_function(i), supported_levels, check_fake_convert)) { return true; } } diff --git a/src/common/low_precision_transformations/src/qdq_stripping.cpp b/src/common/low_precision_transformations/src/qdq_stripping.cpp new file mode 100644 index 00000000000000..72434decce0473 --- /dev/null +++ b/src/common/low_precision_transformations/src/qdq_stripping.cpp @@ -0,0 +1,92 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/qdq_stripping.hpp" + +#include + +#include "itt.hpp" +#include "low_precision/common/ie_lpt_exception.hpp" +#include "low_precision/lpt_itt.hpp" +#include "low_precision/network_helper.hpp" +#include "openvino/core/except.hpp" +#include "openvino/core/type.hpp" +#include "openvino/op/clamp.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/fake_quantize.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/util/log.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace pass { +namespace low_precision { + +FQStrippingTransformation::FQStrippingTransformation(const std::set& levels_to_strip, bool replace_with_clamp) { + MATCHER_SCOPE(FQStrippingTransformation); + auto is_scalar = [](const Output& output) -> bool { + return ov::shape_size(output.get_shape()) == 1; + }; + auto input_low_m = pattern::wrap_type(is_scalar); + auto input_high_m = pattern::wrap_type(is_scalar); + auto output_low_m = pattern::wrap_type(is_scalar); + auto output_high_m = pattern::wrap_type(is_scalar); + auto fq_m = pattern::wrap_type( + {pattern::any_input(), input_low_m, input_high_m, output_low_m, output_high_m}); + + ov::graph_rewrite_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto node = ov::as_type_ptr(pattern_map.at(fq_m).get_node_shared_ptr()); + if (!node) { + return false; + } + + const size_t levels = node->get_levels(); + if (!levels_to_strip.count(levels)) { + return false; + } + + auto input = node->get_input_node_shared_ptr(0); + auto input_low = ov::as_type_ptr(pattern_map.at(input_low_m).get_node_shared_ptr()); + auto input_high = ov::as_type_ptr(pattern_map.at(input_high_m).get_node_shared_ptr()); + auto output_low = ov::as_type_ptr(pattern_map.at(output_low_m).get_node_shared_ptr()); + auto output_high = ov::as_type_ptr(pattern_map.at(output_high_m).get_node_shared_ptr()); + + if (!input_low || !input_high || !output_low || !output_high) { + return false; + } + auto constants_are_equal = [](const std::shared_ptr& lhs, + const std::shared_ptr& rhs) -> bool { + auto equal = + ov::as_type_ptr(ov::op::util::make_try_fold(lhs, rhs)); + OPENVINO_ASSERT(equal && ov::shape_size(equal->get_shape()) == 1, + "constants_are_equal expects scalar constant as a comparison result"); + return equal->get_vector()[0]; + }; + + if (!constants_are_equal(input_low, output_low) || !constants_are_equal(input_high, output_high)) { + return false; + } + + bool res = false; + if (replace_with_clamp) { + auto clamp = std::make_shared(input->output(0), + output_low->cast_vector()[0], + output_high->cast_vector()[0]); + res = replace_node_update_name(node, clamp); + } else { + res = replace_output_update_name(node->output(0), node->input_value(0)); + } + OPENVINO_ASSERT(res, "FQ stripping failed"); + return res; + }; + + auto m = std::make_shared(fq_m, matcher_name); + this->register_matcher(m, callback); +} + +} // namespace low_precision +} // namespace pass +} // namespace ov \ No newline at end of file diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp index a1d3841010278f..6a09c035305f78 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp @@ -7,6 +7,7 @@ #include #include +#include "openvino/core/type/element_type.hpp" #include "openvino/pass/matcher_pass.hpp" #include "transformations_visibility.hpp" @@ -32,5 +33,10 @@ class TRANSFORMATIONS_API ConvertQuantizeDequantize; class ov::pass::ConvertQuantizeDequantize : public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("ConvertQuantizeDequantize"); - ConvertQuantizeDequantize(); + ConvertQuantizeDequantize(const ov::element::TypeVector& supported_low_precisions = {ov::element::i8, + ov::element::u8, + ov::element::i16, + ov::element::u16}, + const ov::element::TypeVector& supported_original_precisions = {ov::element::f32}, + const bool ignore_consumers_count_check = false); }; diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp index 9f6f18f8f70cc6..e6ea1e477a71b6 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp @@ -16,6 +16,7 @@ #include "openvino/op/fake_quantize.hpp" #include "openvino/op/multiply.hpp" #include "openvino/op/subtract.hpp" +#include "openvino/pass/pattern/op/optional.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" @@ -62,100 +63,106 @@ // v // -ov::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize() { +ov::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize( + const ov::element::TypeVector& supported_low_precisions, + const ov::element::TypeVector& supported_original_precisions, + const bool ignore_consumers_count_check) { MATCHER_SCOPE(ConvertQuantizeDequantize); - auto data_pattern = pass::pattern::any_input(); - auto input_low_pattern = pass::pattern::any_input(); - auto input_high_pattern = pass::pattern::any_input(); - auto output_low_pattern = ov::pass::pattern::wrap_type(); - auto output_high_pattern = ov::pass::pattern::wrap_type(); - auto fq_pattern = ov::pass::pattern::wrap_type( + + using namespace ov::pass::pattern; + using namespace ov::op; + + auto data_pattern = any_input(type_matches_any(supported_original_precisions)); + auto input_low_pattern = any_input(); + auto input_high_pattern = any_input(); + auto output_low_pattern = wrap_type(); + auto output_high_pattern = wrap_type(); + auto fq_pattern = wrap_type( {data_pattern, input_low_pattern, input_high_pattern, output_low_pattern, output_high_pattern}); - auto convert1_pattern = ov::pass::pattern::wrap_type( - {fq_pattern}, - pattern::type_matches_any({element::i8, element::u8, element::i16, element::u16})); - auto convert2_pattern = - ov::pass::pattern::wrap_type({convert1_pattern}, pattern::type_matches(element::f32)); - auto zero_point_pattern = pass::pattern::any_input(); - auto sub_pattern = ov::pass::pattern::wrap_type({convert2_pattern, zero_point_pattern}, - pattern::consumers_count(1)); - auto scale_pattern = pass::pattern::any_input(); - auto mul_pattern = ov::pass::pattern::wrap_type({sub_pattern, scale_pattern}); - - ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) { + ov::pass::pattern::op::Predicate convert1_predicate = + ignore_consumers_count_check ? type_matches_any(supported_low_precisions) + : type_matches_any(supported_low_precisions) && consumers_count(1); + auto convert1_pattern = wrap_type({fq_pattern}, convert1_predicate); + ov::pass::pattern::op::Predicate convert2_predicate = + ignore_consumers_count_check ? type_matches_any(supported_original_precisions) + : type_matches_any(supported_original_precisions) && consumers_count(1); + auto convert2_pattern = wrap_type({convert1_pattern}, convert2_predicate); + + auto zero_point_pattern = any_input(); + ov::pass::pattern::op::Predicate sub_predicate = + ignore_consumers_count_check ? ov::pass::pattern::op::Predicate() : consumers_count(1); + auto sub_pattern = optional({convert2_pattern, zero_point_pattern}, sub_predicate); + auto scale_pattern = any_input(); + auto mul_pattern = wrap_type({sub_pattern, scale_pattern}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) { auto pattern_map = m.get_pattern_value_map(); if (transformation_callback(m.get_match_root())) { return false; } - auto data = pattern_map[data_pattern]; - auto input_low = pattern_map[input_low_pattern]; - auto input_high = pattern_map[input_high_pattern]; - auto output_low = ov::as_type_ptr(pattern_map[output_low_pattern].get_node_shared_ptr()); + auto data = pattern_map.at(data_pattern); + auto input_low = pattern_map.at(input_low_pattern); + auto input_high = pattern_map.at(input_high_pattern); + auto output_low = + ov::as_type_ptr(pattern_map.at(output_low_pattern).get_node_shared_ptr()); if (!output_low) return false; auto output_high = - ov::as_type_ptr(pattern_map[output_high_pattern].get_node_shared_ptr()); + ov::as_type_ptr(pattern_map.at(output_high_pattern).get_node_shared_ptr()); if (!output_high) return false; - auto fq = ov::as_type_ptr(pattern_map[fq_pattern].get_node_shared_ptr()); + auto fq = ov::as_type_ptr(pattern_map.at(fq_pattern).get_node_shared_ptr()); if (!fq) return false; - auto zero_point = pattern_map[zero_point_pattern]; - auto scale = pattern_map[scale_pattern]; - auto convert1 = pattern_map[convert1_pattern]; - auto convert2 = pattern_map[convert2_pattern]; - auto mul = pattern_map[mul_pattern].get_node_shared_ptr(); - - // convert1 and convert2 should have only one input - if (convert1.get_target_inputs().size() != 1) - return false; - if (convert2.get_target_inputs().size() != 1) - return false; - - // we support: - // i8 or u8: 'levels' attribute must be 256 - // i16 or u16: 'levels' attribute must be 65536 - size_t levels = fq->get_levels(); - if (levels != 256 && levels != 65536) + auto scale = pattern_map.at(scale_pattern); + auto convert1 = pattern_map.at(convert1_pattern); + auto convert2 = pattern_map.at(convert2_pattern); + auto mul = pattern_map.at(mul_pattern).get_node_shared_ptr(); + + static const std::unordered_set supported_levels{256, 65536}; + const auto levels = fq->get_levels(); + if (!supported_levels.count(levels)) return false; - // check if (out_low_val, out_high_val) is (-128, 127) or (0, 255) or (-32768, 32767) or (0, 65535) float out_low_val; - if (!op::util::get_single_value(output_low, out_low_val)) + if (!ov::op::util::get_single_value(output_low, out_low_val)) return false; float out_high_val; - if (!op::util::get_single_value(output_high, out_high_val)) + if (!ov::op::util::get_single_value(output_high, out_high_val)) return false; + +#define PRECISION_LIMITS_FOR(type) \ + {ov::element::type}, { \ + static_cast(std::numeric_limits>::min()), \ + static_cast(std::numeric_limits>::max()) \ + } + + static const std::unordered_map> supported_intervals{ + {PRECISION_LIMITS_FOR(i8)}, + {PRECISION_LIMITS_FOR(u8)}, + {PRECISION_LIMITS_FOR(i16)}, + {PRECISION_LIMITS_FOR(u16)}}; +#undef PRECISION_LIMITS_FOR + const auto& type = convert1.get_element_type(); - switch (type) { - case element::Type_t::i8: - if (out_low_val != -128 || out_high_val != 127) - return false; - break; - case element::Type_t::u8: - if (out_low_val != 0 || out_high_val != 255) - return false; - break; - case element::Type_t::i16: - if (out_low_val != -32768 || out_high_val != 32767) - return false; - break; - case element::Type_t::u16: - if (out_low_val != 0 || out_high_val != 65535) - return false; - break; - default: + // check if (out_low_val, out_high_val) pair is mapped on the expected precision ranges + auto interval_it = supported_intervals.find(type); + if (interval_it == supported_intervals.end() || + interval_it->second != std::make_pair(out_low_val, out_high_val)) { return false; } - std::shared_ptr new_out_low = - std::make_shared(std::make_shared(output_low, zero_point), - scale); - std::shared_ptr new_out_high = - std::make_shared(std::make_shared(output_high, zero_point), - scale); + const bool has_zero_point = pattern_map.count(zero_point_pattern); + std::shared_ptr new_out_low = output_low, new_out_high = output_high; + if (has_zero_point) { + const auto& zero_point = pattern_map.at(zero_point_pattern); + new_out_low = std::make_shared(new_out_low, zero_point); + new_out_high = std::make_shared(new_out_high, zero_point); + } + new_out_low = std::make_shared(new_out_low, scale); + new_out_high = std::make_shared(new_out_high, scale); // check if new_out_low/high shapes are broadcastable to FQ's input auto data_shape = data.get_partial_shape(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 6beeaa3a5081ea..74fb7e61831a21 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -24,6 +24,7 @@ #include "low_precision/fold_convert.hpp" #include "low_precision/fuse_convert.hpp" #include "low_precision/group_convolution.hpp" +#include "low_precision/qdq_stripping.hpp" #include "low_precision/low_precision.hpp" #include "low_precision/mat_mul.hpp" #include "low_precision/multiply_to_group_convolution.hpp" @@ -387,8 +388,19 @@ void TransformationsPipeline::apply(std::shared_ptr func) { ov::disable_keep_const_precision(node); } - auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); + using namespace ov::pass::low_precision; + auto is_model_quantized = LowPrecision::isFunctionQuantized(func); enableInt8 = config.get_enable_lp_transformations() && is_model_quantized; + const auto enableQDQStripping = LowPrecision::isFunctionQuantized(func, std::set{levels::int16}); + if (enableQDQStripping) { + using namespace ov::element; + // QDQ stripping pipeline + // 1. Fuse FQ->Convert->DQ to a single FQ + manager.register_pass(TypeVector{i16, u16}, TypeVector{f32}, true); + // 2. Strip FQ layers with unsupported levels + manager.register_pass(std::set{levels::int16}, false); + manager.register_pass(); + } manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/qdq_stripping.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/qdq_stripping.cpp new file mode 100644 index 00000000000000..5e204edd08303d --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/qdq_stripping.cpp @@ -0,0 +1,155 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/node_builders/constant.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/convolution.hpp" +#include "openvino/op/fake_quantize.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/runtime/exec_model_info.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace { +using namespace ov::test; +using ov::test::InputShape; + +using QDQStrippingParams = std::tuple; + +class QuantizationParams { +public: + ov::Output build_fq(const ov::Output& input) const { + auto input_low = ov::op::v0::Constant::create(ov::element::f32, {}, {i_l}); + auto input_high = ov::op::v0::Constant::create(ov::element::f32, {}, {i_h}); + auto output_low = ov::op::v0::Constant::create(ov::element::f32, {}, {o_l}); + auto output_high = ov::op::v0::Constant::create(ov::element::f32, {}, {o_h}); + return std::make_shared(input, input_low, input_high, output_low, output_high, 65536); + } + + ov::Output build_dq(const ov::Output& input, const ov::element::Type& quantization_precision) const { + auto act_zero_point = ov::op::v0::Constant::create(quantization_precision, {}, {zero_point}); + auto act_zp_convert = std::make_shared(act_zero_point, ov::element::f32); + + auto act_subtract = std::make_shared(input, act_zp_convert); + auto act_scale = ov::op::v0::Constant::create(ov::element::f32, {}, {(i_h - i_l) / (o_h - o_l)}); + + return std::make_shared(act_subtract, act_scale); + } + + float i_l; + float i_h; + float o_l; + float o_h; + int zero_point; +}; + +class QDQStrippingTest : public testing::WithParamInterface, virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + const auto& [input_shape, input_precision, quantization_precision] = obj.param; + std::ostringstream result; + result << "input_shape=" << input_shape << "_input_precision=" << input_precision << "_quantization_precision=" << quantization_precision; + return result.str(); + } + +protected: + std::shared_ptr init_subgraph(const ov::PartialShape& input_shape, const ov::element::Type& quantization_precision) { + OPENVINO_ASSERT(quantization_precision == ov::element::i16 || quantization_precision == ov::element::u16, + "Only i16 and u16 quantization precisions are supported in the test"); + ov::ParameterVector params{std::make_shared(ov::element::f32, input_shape)}; + // Note: these params are taken from the real cases + static const std::unordered_map> quantization_params{ + {ov::element::Type_t::u16, {{0.f, 10.f, 0.f, 65535.f, 0}, {-6.244578838348389f, 6.347373962402344f, 0.f, 65535.f, 32500}}}, + {ov::element::Type_t::i16, + {{-5.000076293945312f, 4.999923706054688f, -32768.f, 32767.f, 0}, {-6.296072483062744f, 6.295880317687988f, -32768.f, 32767.f, 0}}}, + }; + + const auto& q_params = quantization_params.at(quantization_precision); + const auto& qp_1 = q_params.first; + auto input_fq = qp_1.build_fq(params[0]); + + auto input_convert1 = std::make_shared(input_fq, quantization_precision); + auto input_convert2 = std::make_shared(input_convert1, ov::element::f32); + + size_t seed = 1; + auto create_qdq_branch = [&](float weight_scale_value) { + auto input_dequantized = qp_1.build_dq(input_convert2, quantization_precision); + ov::test::utils::InputGenerateData weights_gen_data; + weights_gen_data.seed = seed; + auto weight_quantized = ov::test::utils::make_constant(ov::element::i8, ov::Shape{32, 3, 3, 3}, weights_gen_data); + auto weight_convert = std::make_shared(weight_quantized, ov::element::f32); + auto weight_scale = ov::test::utils::make_constant(ov::element::f32, {}, std::vector{weight_scale_value}); + auto weight_dequantized = std::make_shared(weight_convert, weight_scale); + + auto conv = std::make_shared(input_dequantized, + weight_dequantized, + ov::Strides{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::Strides{1, 1}); + + ov::test::utils::InputGenerateData bias_gen_data(-2.0, 4, 100, seed++); + auto bias_const = ov::test::utils::make_constant(ov::element::f32, ov::Shape{1, 32, 1, 1}, bias_gen_data); + auto conv_biased = std::make_shared(conv, bias_const); + + const auto& qp_2 = q_params.second; + auto fake_quantize = qp_2.build_fq(conv_biased); + auto act_quantized = std::make_shared(fake_quantize, quantization_precision); + auto act_convert = std::make_shared(act_quantized, ov::element::f32); + return qp_2.build_dq(act_convert, quantization_precision); + }; + + auto left_branch = create_qdq_branch(1e-3f); + auto right_branch = create_qdq_branch(1e-4f); + auto add_branches = std::make_shared(left_branch, right_branch); + + auto model = std::make_shared(ov::OutputVector{add_branches}, params, "QDQStripping"); + return model; + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_GPU; + const auto& [input_shape, input_precision, quantization_precision] = GetParam(); + init_input_shapes({input_shape}); + inType = outType = input_precision; + + // Since the FQ are not executed in a strictly 'fair' manner, and just replaced with clamp ops, a small accuracy deviation is expected. + abs_threshold = 1e-3f; + function = init_subgraph(input_shape.first, quantization_precision); + } + + void validate() override { + ov::test::SubgraphBaseTest::validate(); + auto runtime_model = compiledModel.get_runtime_model(); + ASSERT_TRUE(runtime_model != nullptr) << "Runtime model should not be null"; + size_t quantize_count = 0; + for (const auto& op : runtime_model->get_ordered_ops()) { + auto layer_type = op->get_rt_info().at(ov::exec_model_info::LAYER_TYPE).as(); + if (layer_type == std::string("Quantize")) { + quantize_count++; + } + } + const size_t expected_quantize_count = 0; + ASSERT_EQ(quantize_count, expected_quantize_count) << "Unexpected Quantize node count."; + } +}; + +TEST_P(QDQStrippingTest, Inference) { + run(); +} + +const std::vector input_shapes = {{{-1, -1, -1, -1}, {{1, 3, 128, 128}}}}; +const std::vector input_precisions = {ov::element::f32}; +const std::vector quantization_precisions = {ov::element::u16, ov::element::i16}; + +INSTANTIATE_TEST_SUITE_P(smoke_QDQStripping, + QDQStrippingTest, + ::testing::Combine(::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(input_precisions), + ::testing::ValuesIn(quantization_precisions)), + QDQStrippingTest::getTestCaseName); +} // namespace \ No newline at end of file