diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 4109ea1740..03b13baddf 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -2128,13 +2128,20 @@ public void registerConjuncts(Expr e, boolean fromHavingClause) * evaluates the conjunct. If the conjunct evaluates to false, marks this query * block as having an empty result set or as having an empty select-project-join * portion, if fromHavingClause is true or false, respectively. - * No-op if the conjunct is not constant or is outer joined. + * No-op if the conjunct is not constant, is outer joined, or is anti-joined. + * For anti-joins, a constant FALSE in the ON clause means no rows from the right + * table will match, so all rows from the left table should be returned (for LEFT + * ANTI JOIN) or no rows should be returned (for RIGHT ANTI JOIN). We should not + * mark the entire query as having an empty result set. * Return true, if conjunct is constant FALSE. * Throws an AnalysisException if there is an error evaluating `conjunct` */ private boolean markConstantConjunct(Expr conjunct, boolean fromHavingClause) throws AnalysisException { - if (!conjunct.isConstant() || isOjConjunct(conjunct)) return false; + if (!conjunct.isConstant() || isOjConjunct(conjunct) + || isAntiJoinedConjunct(conjunct)) { + return false; + } markConjunctAssigned(conjunct); if ((!fromHavingClause && !hasEmptySpjResultSet_) || (fromHavingClause && !hasEmptyResultSet_)) { @@ -2495,13 +2502,15 @@ public boolean canEvalOuterJoinedConjunct(Expr e, List tids) { */ public boolean canEvalPredicate(List tupleIds, Expr e) { if (!e.isBoundByTupleIds(tupleIds)) return false; - List tids = new ArrayList<>(); - e.getIds(tids, null); - if (tids.isEmpty()) return true; - + // Check On-clause conjuncts before the tids.isEmpty() shortcut so that constant + // On-clause predicates (e.g. ON false) are routed to canEvalOnClauseConjunct() + // instead of being incorrectly assigned to a scan node. if (e.isOnClauseConjunct()) { return canEvalOnClauseConjunct(tupleIds, e); } + List tids = new ArrayList<>(); + e.getIds(tids, null); + if (tids.isEmpty()) return true; return isLastOjMaterializedByTupleIds(tupleIds, e); } @@ -2587,6 +2596,11 @@ public boolean canEvalAntiJoinedConjunct(Expr e, List nodeTupleIds) { if (antiJoinRef == null) return true; List tids = new ArrayList<>(); e.getIds(tids, null); + if (tids.isEmpty()) { + // Constant anti-join On-clause conjuncts (e.g. ON false) must be evaluated + // at the anti-join node, not at a scan node below it. + return nodeTupleIds.containsAll(antiJoinRef.getAllTableRefIds()); + } if (tids.size() > 1) { return nodeTupleIds.containsAll(antiJoinRef.getAllTableRefIds()) && antiJoinRef.getAllTableRefIds().containsAll(nodeTupleIds); diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java index bf70f0b377..9a4232cf92 100644 --- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java @@ -1317,6 +1317,15 @@ private void getConjunctsToInlineView(final Analyzer analyzer, final String alia List tids = new ArrayList<>(); e.getIds(tids, null); if (tids.isEmpty()) { + // Do not migrate anti-join conjuncts (e.g., ON FALSE) into the inline view. + // For anti-joins, a constant FALSE in the ON clause means no rows from the + // right table will match, so the correct behavior depends on the join + // type (LEFT ANTI returns all left rows, RIGHT ANTI returns all right rows). + // Migrating such conjuncts would incorrectly mark the inline view as having an + // empty result set. + if (analyzer.isAntiJoinedConjunct(e)) { + continue; + } evalInInlineViewPreds.add(e); } else if (e.isOnClauseConjunct()) { if (!analyzer.canEvalOnClauseConjunct(tupleIds, e)) continue; @@ -2083,8 +2092,8 @@ private PlanNode createJoinNode(PlanNode outer, PlanNode inner, // Unassigned conjuncts bound by the invisible tuple id of a semi join must have // come from the join's On-clause, and therefore, must be added to the other join // conjuncts to produce correct results. - // TODO This doesn't handle predicates specified in the On clause which are not - // bound by any tuple id (e.g. ON (true)) + // Note: Constant predicates in the On clause (e.g. ON TRUE, ON FALSE) are now + // handled correctly through canEvalPredicate() and canEvalAntiJoinedConjunct(). List tblRefIds = Lists.newArrayList(outer.getTblRefIds()); tblRefIds.addAll(inner.getTblRefIds()); otherJoinConjuncts = analyzer.getUnassignedConjuncts(tblRefIds, false); diff --git a/testdata/workloads/functional-query/queries/QueryTest/semi-joins.test b/testdata/workloads/functional-query/queries/QueryTest/semi-joins.test index 9f497b7e21..bc51ef5fd0 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/semi-joins.test +++ b/testdata/workloads/functional-query/queries/QueryTest/semi-joins.test @@ -346,3 +346,190 @@ RIGHT ANTI JOIN SemiJoinTblB b on v.b = b.b ---- TYPES INT, INT, INT ==== +---- QUERY +# Testing LEFT ANTI JOIN with ON FALSE should return all rows from left table +# because no rows from right table can match. +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false +---- RESULTS +1,1,1 +1,1,10 +1,2,10 +1,3,10 +NULL,NULL,30 +2,4,30 +2,NULL,20 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing RIGHT ANTI JOIN with ON FALSE should return all rows from right table +# because no rows from left table can match. +SELECT b.* FROM SemiJoinTblA a RIGHT ANTI JOIN SemiJoinTblB b ON false +---- RESULTS +1,1,1 +1,1,10 +1,2,5 +1,NULL,10 +2,10,NULL +3,NULL,NULL +3,NULL,50 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing LEFT ANTI JOIN with ON TRUE should return empty result set +# because all rows from right table match. +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON true +---- RESULTS +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing RIGHT ANTI JOIN with ON TRUE should return empty result set +# because all rows from left table match. +SELECT b.* FROM SemiJoinTblA a RIGHT ANTI JOIN SemiJoinTblB b ON true +---- RESULTS +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing LEFT ANTI JOIN with constant expression 1=0 (equivalent to FALSE) +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON 1=0 +---- RESULTS +1,1,1 +1,1,10 +1,2,10 +1,3,10 +NULL,NULL,30 +2,4,30 +2,NULL,20 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing LEFT ANTI JOIN with constant expression 1=1 (equivalent to TRUE) +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON 1=1 +---- RESULTS +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing LEFT ANTI JOIN with ON FALSE and WHERE clause +# Should return rows from left table that satisfy WHERE condition +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false +WHERE a.a = 1 +---- RESULTS +1,1,1 +1,1,10 +1,2,10 +1,3,10 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing LEFT ANTI JOIN with ON TRUE and WHERE clause +# Should return empty result (anti-join filters all, then WHERE is applied) +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON true +WHERE a.a = 1 +---- RESULTS +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing LEFT SEMI JOIN with ON FALSE should return empty result +# because no rows from right table can match. +SELECT a.* FROM SemiJoinTblA a LEFT SEMI JOIN SemiJoinTblB b ON false +---- RESULTS +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing LEFT SEMI JOIN with ON TRUE should return all rows from left table +SELECT a.* FROM SemiJoinTblA a LEFT SEMI JOIN SemiJoinTblB b ON true +---- RESULTS +1,1,1 +1,1,10 +1,2,10 +1,3,10 +NULL,NULL,30 +2,4,30 +2,NULL,20 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing nested query with LEFT ANTI JOIN ON FALSE +SELECT * FROM ( + SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false +) v WHERE v.a = 2 +---- RESULTS +2,4,30 +2,NULL,20 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing multiple anti-joins with constant predicates +SELECT a.* FROM SemiJoinTblA a +LEFT ANTI JOIN SemiJoinTblB b ON false +LEFT ANTI JOIN SemiJoinTblB c ON true +---- RESULTS +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing anti-join with mixed constant and regular predicates (ON FALSE OR a.a = b.a) +# The OR with FALSE should be optimized away, leaving just a.a = b.a +# Only rows with NULL in column a are returned because NULL doesn't match anything +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON (false OR a.a = b.a) +---- RESULTS +NULL,NULL,30 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing anti-join with mixed constant and regular predicates (ON TRUE AND a.a = b.a) +# The AND with TRUE should be optimized away, leaving just a.a = b.a +# Only rows with NULL in column a are returned because NULL doesn't match anything +SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON (true AND a.a = b.a) +---- RESULTS +NULL,NULL,30 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing anti-join with inline view on the left side and ON FALSE +# The inline view should not be marked as having an empty result set +SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 1) v +LEFT ANTI JOIN SemiJoinTblB b ON false +---- RESULTS +1,1,1 +1,1,10 +1,2,10 +1,3,10 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing nested inline views with anti-join and ON FALSE +# Verifies that constant predicates don't propagate incorrectly through nested views +SELECT * FROM ( + SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 2) v + LEFT ANTI JOIN SemiJoinTblB b ON false +) v2 WHERE v2.a = 2 +---- RESULTS +2,4,30 +2,NULL,20 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Testing anti-join with inline view and mixed predicates +# Constant FALSE should not be migrated into the inline view +SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 1) v +LEFT ANTI JOIN SemiJoinTblB b ON (false OR v.a = b.a) +---- RESULTS +---- TYPES +INT, INT, INT +==== +