From 29e61db33a4a7855c252eda3f35d921a8e863637 Mon Sep 17 00:00:00 2001 From: minghong Date: Fri, 20 Sep 2024 15:47:40 +0800 Subject: [PATCH] [opt](nereids) set lower bound for range-selectivity #40089 --- .../org/apache/doris/nereids/stats/FilterEstimation.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 6f6e768caae1f6..faa9fd323d68f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -69,6 +69,9 @@ */ public class FilterEstimation extends ExpressionVisitor { public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5; + // "Range selectivity is prone to producing outliers, so we add this threshold limit. + // The threshold estimation is calculated based on selecting one month out of fifty years." + public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016; public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0; public static final double DEFAULT_HAVING_COEFFICIENT = 0.01; @@ -602,6 +605,8 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType double sel = leftRange.overlapPercentWith(rightRange); if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) { sel = DEFAULT_INEQUALITY_COEFFICIENT; + } else if (sel < RANGE_SELECTIVITY_THRESHOLD) { + sel = RANGE_SELECTIVITY_THRESHOLD; } sel = getNotNullSelectivity(leftStats, sel); updatedStatistics = context.statistics.withSel(sel);