Skip to content

Commit

Permalink
[opt](nereids) set lower bound for range-selectivity(2.1) (#41061)
Browse files Browse the repository at this point in the history
## Proposed changes
pick #40089
Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
englefly committed Sep 21, 2024
1 parent d1d52ae commit 9dc55f9
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@
*/
public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationContext> {
public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5;
// "Range selectivity is prone to producing outliers, so we add this threshold limit.
// The threshold estimation is calculated based on selecting one month out of fifty years."
public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016;
public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0;

public static final double DEFAULT_HAVING_COEFFICIENT = 0.01;
Expand Down Expand Up @@ -627,6 +630,8 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType
: intersectRange.getDistinctValues() / leftRange.getDistinctValues();
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
sel = DEFAULT_INEQUALITY_COEFFICIENT;
} else if (sel < RANGE_SELECTIVITY_THRESHOLD) {
sel = RANGE_SELECTIVITY_THRESHOLD;
}
sel = getNotNullSelectivity(leftStats, sel);
updatedStatistics = context.statistics.withSel(sel);
Expand Down
17 changes: 8 additions & 9 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@ PhysicalResultSink
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ws_item_sk]
--------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
--------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------PhysicalProject
------------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15'))
--------------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15'))
------------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------PhysicalProject
------------------------------filter(i_category IN ('Books', 'Electronics', 'Men'))
--------------------------------PhysicalOlapScan[item]
Expand Down
49 changes: 23 additions & 26 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,31 @@ PhysicalResultSink
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5 ss_ticket_number->[sr_ticket_number]
--------------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5
--------------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk]
--------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
----------------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------------PhysicalProject
--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
------------------------------------------------PhysicalProject
--------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------------------PhysicalProject
----------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14'))
------------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------------------------PhysicalProject
--------------------------------------------------filter((promotion.p_channel_tv = 'N'))
----------------------------------------------------PhysicalOlapScan[promotion]
----------------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------------PhysicalProject
--------------------------------------------filter((item.i_current_price > 50.00))
----------------------------------------------PhysicalOlapScan[item]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5
--------------------------------PhysicalProject
----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk]
------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
--------------------------------------PhysicalProject
----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------------PhysicalProject
------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14'))
--------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------------PhysicalProject
----------------------------------------------filter((promotion.p_channel_tv = 'N'))
------------------------------------------------PhysicalOlapScan[promotion]
--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[store]
------------------------------------------filter((item.i_current_price > 50.00))
--------------------------------------------PhysicalOlapScan[item]
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store]
--------------------PhysicalProject
----------------------hashAgg[GLOBAL]
------------------------PhysicalDistribute[DistributionSpecHash]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@ PhysicalResultSink
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ws_item_sk]
--------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
----------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------PhysicalProject
----------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15'))
------------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15'))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------PhysicalProject
------------------------------filter(i_category IN ('Books', 'Electronics', 'Men'))
--------------------------------PhysicalOlapScan[item]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,31 @@ PhysicalResultSink
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5 ss_ticket_number->[sr_ticket_number]
--------------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5
--------------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk]
--------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
----------------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------------PhysicalProject
--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
------------------------------------------------PhysicalProject
--------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------------------PhysicalProject
----------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14'))
------------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------------------------PhysicalProject
--------------------------------------------------filter((promotion.p_channel_tv = 'N'))
----------------------------------------------------PhysicalOlapScan[promotion]
----------------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------------PhysicalProject
--------------------------------------------filter((item.i_current_price > 50.00))
----------------------------------------------PhysicalOlapScan[item]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5
--------------------------------PhysicalProject
----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk]
------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
--------------------------------------PhysicalProject
----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------------PhysicalProject
------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14'))
--------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------------PhysicalProject
----------------------------------------------filter((promotion.p_channel_tv = 'N'))
------------------------------------------------PhysicalOlapScan[promotion]
--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[store]
------------------------------------------filter((item.i_current_price > 50.00))
--------------------------------------------PhysicalOlapScan[item]
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store]
--------------------PhysicalProject
----------------------hashAgg[GLOBAL]
------------------------PhysicalDistribute[DistributionSpecHash]
Expand Down

0 comments on commit 9dc55f9

Please sign in to comment.