Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ids) fix bug for A>n, where A.max is infinity #39936 (2.0) #41094

Merged
merged 2 commits into from
Sep 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -602,11 +602,13 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(intersectRange.getDistinctValues())
.setNumNulls(0);
double sel = leftRange.overlapPercentWith(rightRange);
double sel = leftRange.getDistinctValues() == 0
? 1.0
: intersectRange.getDistinctValues() / leftRange.getDistinctValues();
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
sel = DEFAULT_INEQUALITY_COEFFICIENT;
} else if (sel < RANGE_SELECTIVITY_THRESHOLD) {
sel = RANGE_SELECTIVITY_THRESHOLD;
} else {
sel = Math.max(sel, RANGE_SELECTIVITY_THRESHOLD);
}
sel = getNotNullSelectivity(leftStats, sel);
updatedStatistics = context.statistics.withSel(sel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ public boolean isInfinite() {
return Double.isInfinite(low) || Double.isInfinite(high);
}

public boolean isOneSideInfinite() {
return isInfinite() && !isBothInfinite();
}

public boolean isFinite() {
return Double.isFinite(low) && Double.isFinite(high);
}
Expand Down Expand Up @@ -175,22 +179,29 @@ public Pair<Double, LiteralExpr> maxPair(double r1, LiteralExpr e1, double r2, L
}

public StatisticRange cover(StatisticRange other) {
// double newLow = Math.max(low, other.low);
// double newHigh = Math.min(high, other.high);
StatisticRange resultRange;
Pair<Double, LiteralExpr> biggerLow = maxPair(low, lowExpr, other.low, other.lowExpr);
double newLow = biggerLow.first;
LiteralExpr newLowExpr = biggerLow.second;
Pair<Double, LiteralExpr> smallerHigh = minPair(high, highExpr, other.high, other.highExpr);
double newHigh = smallerHigh.first;
LiteralExpr newHighExpr = smallerHigh.second;

if (newLow <= newHigh) {
double overlapPercentOfLeft = overlapPercentWith(other);
double overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
double coveredDistinctValues = minExcludeNaN(distinctValues, overlapDistinctValuesLeft);
return new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, coveredDistinctValues, dataType);
if (this.isBothInfinite() && other.isOneSideInfinite()) {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr,
distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR,
dataType);
} else {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, coveredDistinctValues,
dataType);
}
} else {
resultRange = empty(dataType);
}
return empty(dataType);
return resultRange;
}

public StatisticRange union(StatisticRange other) {
Expand Down Expand Up @@ -241,6 +252,6 @@ public double getDistinctValues() {

@Override
public String toString() {
return "(" + lowExpr + "," + highExpr + ")";
return "range=(" + lowExpr + "," + highExpr + "), ndv=" + distinctValues;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1292,4 +1292,40 @@ public void testStringRangeColToCol() {
Statistics agrtc = new FilterEstimation().estimate(new GreaterThan(a, c), baseStats);
Assertions.assertEquals(50, agrtc.getRowCount());
}

@Test
void testAndWithInfinity() {
Double row = 1000.0;
SlotReference a = new SlotReference("a", new VarcharType(25));
ColumnStatisticBuilder columnStatisticBuilderA = new ColumnStatisticBuilder()
.setNdv(10)
.setAvgSizeByte(4)
.setNumNulls(0)
.setCount(row);

SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
ColumnStatisticBuilder columnStatisticBuilderB = new ColumnStatisticBuilder()
.setNdv(488)
.setAvgSizeByte(25)
.setNumNulls(0)
.setCount(row);
StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(row);
statsBuilder.putColumnStatistics(a, columnStatisticBuilderA.build());
statsBuilder.putColumnStatistics(b, columnStatisticBuilderB.build());
Expression strGE = new GreaterThanEqual(a,
new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-05-14"));
Statistics strStats = new FilterEstimation().estimate(strGE, statsBuilder.build());
Assertions.assertEquals(500, strStats.getRowCount());

Expression intGE = new GreaterThan(b, new IntegerLiteral(0));
Statistics intStats = new FilterEstimation().estimate(intGE, statsBuilder.build());
Assertions.assertEquals(500, intStats.getRowCount());

Expression predicate = new And(strGE, intGE);

Statistics stats = new FilterEstimation().estimate(predicate, statsBuilder.build());
Assertions.assertEquals(250, stats.getRowCount());
}

}
33 changes: 17 additions & 16 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,21 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------PhysicalProject
------------------PhysicalOlapScan[customer]
--------------PhysicalDistribute
----------------hashJoin[INNER_JOIN](ctr1.ctr_store_sk = ctr2.ctr_store_sk)(cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](store.s_store_sk = ctr1.ctr_store_sk)
----------------------PhysicalDistribute
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------filter((store.s_state = 'SD'))
----------------------------PhysicalOlapScan[store]
------------------PhysicalDistribute
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute
------------------------hashAgg[LOCAL]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------PhysicalProject
------------------hashJoin[INNER_JOIN](ctr1.ctr_store_sk = ctr2.ctr_store_sk)(cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](store.s_store_sk = ctr1.ctr_store_sk)
------------------------PhysicalDistribute
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------filter((store.s_state = 'SD'))
------------------------------PhysicalOlapScan[store]
--------------------PhysicalDistribute
----------------------hashAgg[GLOBAL]
------------------------PhysicalDistribute
--------------------------hashAgg[LOCAL]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )

105 changes: 51 additions & 54 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
Original file line number Diff line number Diff line change
Expand Up @@ -95,78 +95,75 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------------PhysicalUnion
------------------------PhysicalProject
--------------------------NestedLoopJoin[INNER_JOIN](cast(sales as DOUBLE) > cast(average_sales as DOUBLE))
----------------------------PhysicalProject
------------------------------hashAgg[GLOBAL]
--------------------------------PhysicalDistribute
----------------------------------hashAgg[LOCAL]
------------------------------------PhysicalProject
--------------------------------------hashJoin[RIGHT_SEMI_JOIN](store_sales.ss_item_sk = cross_items.ss_item_sk)
----------------------------hashAgg[GLOBAL]
------------------------------PhysicalDistribute
--------------------------------hashAgg[LOCAL]
----------------------------------PhysicalProject
------------------------------------hashJoin[RIGHT_SEMI_JOIN](store_sales.ss_item_sk = cross_items.ss_item_sk)
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
------------------------------------------PhysicalDistribute
--------------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[store_sales]
----------------------------------------------PhysicalDistribute
------------------------------------------------PhysicalProject
--------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute
------------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[item]
----------------------------------------------PhysicalOlapScan[store_sales]
--------------------------------------------PhysicalDistribute
----------------------------------------------PhysicalProject
------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
--------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[item]
----------------------------PhysicalDistribute
------------------------------PhysicalAssertNumRows
--------------------------------PhysicalDistribute
----------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
------------------------PhysicalProject
--------------------------NestedLoopJoin[INNER_JOIN](cast(sales as DOUBLE) > cast(average_sales as DOUBLE))
----------------------------PhysicalProject
------------------------------hashAgg[GLOBAL]
--------------------------------PhysicalDistribute
----------------------------------hashAgg[LOCAL]
------------------------------------PhysicalProject
--------------------------------------hashJoin[RIGHT_SEMI_JOIN](catalog_sales.cs_item_sk = cross_items.ss_item_sk)
----------------------------hashAgg[GLOBAL]
------------------------------PhysicalDistribute
--------------------------------hashAgg[LOCAL]
----------------------------------PhysicalProject
------------------------------------hashJoin[RIGHT_SEMI_JOIN](catalog_sales.cs_item_sk = cross_items.ss_item_sk)
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
------------------------------------------PhysicalDistribute
--------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[catalog_sales]
----------------------------------------------PhysicalDistribute
------------------------------------------------PhysicalProject
--------------------------------------------------filter((date_dim.d_moy = 11)(date_dim.d_year = 2002))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute
------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[item]
----------------------------------------------PhysicalOlapScan[catalog_sales]
--------------------------------------------PhysicalDistribute
----------------------------------------------PhysicalProject
------------------------------------------------filter((date_dim.d_moy = 11)(date_dim.d_year = 2002))
--------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[item]
----------------------------PhysicalDistribute
------------------------------PhysicalAssertNumRows
--------------------------------PhysicalDistribute
----------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
------------------------PhysicalProject
--------------------------NestedLoopJoin[INNER_JOIN](cast(sales as DOUBLE) > cast(average_sales as DOUBLE))
----------------------------PhysicalProject
------------------------------hashAgg[GLOBAL]
--------------------------------PhysicalDistribute
----------------------------------hashAgg[LOCAL]
------------------------------------PhysicalProject
--------------------------------------hashJoin[RIGHT_SEMI_JOIN](web_sales.ws_item_sk = cross_items.ss_item_sk)
----------------------------hashAgg[GLOBAL]
------------------------------PhysicalDistribute
--------------------------------hashAgg[LOCAL]
----------------------------------PhysicalProject
------------------------------------hashJoin[RIGHT_SEMI_JOIN](web_sales.ws_item_sk = cross_items.ss_item_sk)
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
------------------------------------------PhysicalDistribute
--------------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[web_sales]
----------------------------------------------PhysicalDistribute
------------------------------------------------PhysicalProject
--------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute
------------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[item]
----------------------------------------------PhysicalOlapScan[web_sales]
--------------------------------------------PhysicalDistribute
----------------------------------------------PhysicalProject
------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
--------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[item]
----------------------------PhysicalDistribute
------------------------------PhysicalAssertNumRows
--------------------------------PhysicalDistribute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ PhysicalResultSink
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)((substring(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') OR ca_state IN ('CA', 'WA', 'GA')) OR (catalog_sales.cs_sales_price > 500.00))
------------------PhysicalDistribute
--------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
----------------------PhysicalProject
------------------------PhysicalOlapScan[catalog_sales]
----------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
------------------------PhysicalProject
--------------------------filter((date_dim.d_qoy = 1)(date_dim.d_year = 2001))
----------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalOlapScan[catalog_sales]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------filter((date_dim.d_qoy = 1)(date_dim.d_year = 2001))
------------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](customer.c_current_addr_sk = customer_address.ca_address_sk)
Expand Down
Loading
Loading