Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions fe/src/main/java/org/apache/impala/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -2128,13 +2128,20 @@ public void registerConjuncts(Expr e, boolean fromHavingClause)
* evaluates the conjunct. If the conjunct evaluates to false, marks this query
* block as having an empty result set or as having an empty select-project-join
* portion, if fromHavingClause is true or false, respectively.
* No-op if the conjunct is not constant or is outer joined.
* No-op if the conjunct is not constant, is outer joined, or is anti-joined.
* For anti-joins, a constant FALSE in the ON clause means no rows from the right
* table will match, so all rows from the left table should be returned (for LEFT
* ANTI JOIN) or no rows should be returned (for RIGHT ANTI JOIN). We should not
* mark the entire query as having an empty result set.
* Return true, if conjunct is constant FALSE.
* Throws an AnalysisException if there is an error evaluating `conjunct`
*/
private boolean markConstantConjunct(Expr conjunct, boolean fromHavingClause)
throws AnalysisException {
if (!conjunct.isConstant() || isOjConjunct(conjunct)) return false;
if (!conjunct.isConstant() || isOjConjunct(conjunct)
|| isAntiJoinedConjunct(conjunct)) {
return false;
}
markConjunctAssigned(conjunct);
if ((!fromHavingClause && !hasEmptySpjResultSet_)
|| (fromHavingClause && !hasEmptyResultSet_)) {
Expand Down Expand Up @@ -2495,13 +2502,15 @@ public boolean canEvalOuterJoinedConjunct(Expr e, List<TupleId> tids) {
*/
public boolean canEvalPredicate(List<TupleId> tupleIds, Expr e) {
if (!e.isBoundByTupleIds(tupleIds)) return false;
List<TupleId> tids = new ArrayList<>();
e.getIds(tids, null);
if (tids.isEmpty()) return true;

// Check On-clause conjuncts before the tids.isEmpty() shortcut so that constant
// On-clause predicates (e.g. ON false) are routed to canEvalOnClauseConjunct()
// instead of being incorrectly assigned to a scan node.
if (e.isOnClauseConjunct()) {
return canEvalOnClauseConjunct(tupleIds, e);
}
List<TupleId> tids = new ArrayList<>();
e.getIds(tids, null);
if (tids.isEmpty()) return true;
return isLastOjMaterializedByTupleIds(tupleIds, e);
}

Expand Down Expand Up @@ -2587,6 +2596,11 @@ public boolean canEvalAntiJoinedConjunct(Expr e, List<TupleId> nodeTupleIds) {
if (antiJoinRef == null) return true;
List<TupleId> tids = new ArrayList<>();
e.getIds(tids, null);
if (tids.isEmpty()) {
// Constant anti-join On-clause conjuncts (e.g. ON false) must be evaluated
// at the anti-join node, not at a scan node below it.
return nodeTupleIds.containsAll(antiJoinRef.getAllTableRefIds());
}
if (tids.size() > 1) {
return nodeTupleIds.containsAll(antiJoinRef.getAllTableRefIds())
&& antiJoinRef.getAllTableRefIds().containsAll(nodeTupleIds);
Expand Down
13 changes: 11 additions & 2 deletions fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -1317,6 +1317,15 @@ private void getConjunctsToInlineView(final Analyzer analyzer, final String alia
List<TupleId> tids = new ArrayList<>();
e.getIds(tids, null);
if (tids.isEmpty()) {
// Do not migrate anti-join conjuncts (e.g., ON FALSE) into the inline view.
// For anti-joins, a constant FALSE in the ON clause means no rows from the
// right table will match, so the correct behavior depends on the join
// type (LEFT ANTI returns all left rows, RIGHT ANTI returns all right rows).
// Migrating such conjuncts would incorrectly mark the inline view as having an
// empty result set.
if (analyzer.isAntiJoinedConjunct(e)) {
continue;
}
evalInInlineViewPreds.add(e);
} else if (e.isOnClauseConjunct()) {
if (!analyzer.canEvalOnClauseConjunct(tupleIds, e)) continue;
Expand Down Expand Up @@ -2083,8 +2092,8 @@ private PlanNode createJoinNode(PlanNode outer, PlanNode inner,
// Unassigned conjuncts bound by the invisible tuple id of a semi join must have
// come from the join's On-clause, and therefore, must be added to the other join
// conjuncts to produce correct results.
// TODO This doesn't handle predicates specified in the On clause which are not
// bound by any tuple id (e.g. ON (true))
// Note: Constant predicates in the On clause (e.g. ON TRUE, ON FALSE) are now
// handled correctly through canEvalPredicate() and canEvalAntiJoinedConjunct().
List<TupleId> tblRefIds = Lists.newArrayList(outer.getTblRefIds());
tblRefIds.addAll(inner.getTblRefIds());
otherJoinConjuncts = analyzer.getUnassignedConjuncts(tblRefIds, false);
Expand Down
187 changes: 187 additions & 0 deletions testdata/workloads/functional-query/queries/QueryTest/semi-joins.test
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,190 @@ RIGHT ANTI JOIN SemiJoinTblB b on v.b = b.b
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON FALSE should return all rows from left table
# because no rows from right table can match.
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing RIGHT ANTI JOIN with ON FALSE should return all rows from right table
# because no rows from left table can match.
SELECT b.* FROM SemiJoinTblA a RIGHT ANTI JOIN SemiJoinTblB b ON false
---- RESULTS
1,1,1
1,1,10
1,2,5
1,NULL,10
2,10,NULL
3,NULL,NULL
3,NULL,50
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON TRUE should return empty result set
# because all rows from right table match.
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON true
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing RIGHT ANTI JOIN with ON TRUE should return empty result set
# because all rows from left table match.
SELECT b.* FROM SemiJoinTblA a RIGHT ANTI JOIN SemiJoinTblB b ON true
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with constant expression 1=0 (equivalent to FALSE)
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON 1=0
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with constant expression 1=1 (equivalent to TRUE)
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON 1=1
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON FALSE and WHERE clause
# Should return rows from left table that satisfy WHERE condition
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false
WHERE a.a = 1
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON TRUE and WHERE clause
# Should return empty result (anti-join filters all, then WHERE is applied)
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON true
WHERE a.a = 1
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT SEMI JOIN with ON FALSE should return empty result
# because no rows from right table can match.
SELECT a.* FROM SemiJoinTblA a LEFT SEMI JOIN SemiJoinTblB b ON false
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT SEMI JOIN with ON TRUE should return all rows from left table
SELECT a.* FROM SemiJoinTblA a LEFT SEMI JOIN SemiJoinTblB b ON true
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing nested query with LEFT ANTI JOIN ON FALSE
SELECT * FROM (
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false
) v WHERE v.a = 2
---- RESULTS
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing multiple anti-joins with constant predicates
SELECT a.* FROM SemiJoinTblA a
LEFT ANTI JOIN SemiJoinTblB b ON false
LEFT ANTI JOIN SemiJoinTblB c ON true
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with mixed constant and regular predicates (ON FALSE OR a.a = b.a)
# The OR with FALSE should be optimized away, leaving just a.a = b.a
# Only rows with NULL in column a are returned because NULL doesn't match anything
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON (false OR a.a = b.a)
---- RESULTS
NULL,NULL,30
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with mixed constant and regular predicates (ON TRUE AND a.a = b.a)
# The AND with TRUE should be optimized away, leaving just a.a = b.a
# Only rows with NULL in column a are returned because NULL doesn't match anything
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON (true AND a.a = b.a)
---- RESULTS
NULL,NULL,30
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with inline view on the left side and ON FALSE
# The inline view should not be marked as having an empty result set
SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 1) v
LEFT ANTI JOIN SemiJoinTblB b ON false
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing nested inline views with anti-join and ON FALSE
# Verifies that constant predicates don't propagate incorrectly through nested views
SELECT * FROM (
SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 2) v
LEFT ANTI JOIN SemiJoinTblB b ON false
) v2 WHERE v2.a = 2
---- RESULTS
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with inline view and mixed predicates
# Constant FALSE should not be migrated into the inline view
SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 1) v
LEFT ANTI JOIN SemiJoinTblB b ON (false OR v.a = b.a)
---- RESULTS
---- TYPES
INT, INT, INT
====