From 545d01ea2c1e6d588b24d9080cf44a4bb4aee2f1 Mon Sep 17 00:00:00 2001 From: pavle-martinovic_data Date: Tue, 18 Mar 2025 15:00:59 +0100 Subject: [PATCH 1/3] [MINOR][SQL] Add Cross Join as legal in recursion of Recursive CTE --- .../apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala index 2b2317833d29c..4627170819dbd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.SubqueryExpression -import org.apache.spark.sql.catalyst.plans.{Inner, LeftAnti, LeftOuter, LeftSemi, RightOuter} +import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftAnti, LeftOuter, LeftSemi, RightOuter} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.{CTE, PLAN_EXPRESSION} @@ -214,6 +214,9 @@ object ResolveWithCTE extends Rule[LogicalPlan] { case Join(left, right, Inner, _, _) => checkIfSelfReferenceIsPlacedCorrectly(left, cteId, allowRecursiveRef) checkIfSelfReferenceIsPlacedCorrectly(right, cteId, allowRecursiveRef) + case Join(left, right, Cross, _, _) => + checkIfSelfReferenceIsPlacedCorrectly(left, cteId, allowRecursiveRef) + checkIfSelfReferenceIsPlacedCorrectly(right, cteId, allowRecursiveRef) case Join(left, right, LeftOuter, _, _) => checkIfSelfReferenceIsPlacedCorrectly(left, cteId, allowRecursiveRef) checkIfSelfReferenceIsPlacedCorrectly(right, cteId, allowRecursiveRef = false) From 8504d26fcfa538d824b637e492f96e00d834de3c Mon Sep 17 00:00:00 2001 From: pavle-martinovic_data Date: Fri, 21 Mar 2025 15:38:36 +0100 Subject: [PATCH 2/3] Add test for CROSS JOIN in recursive CTE --- .../analyzer-results/cte-recursion.sql.out | 52 +++++++++ .../sql-tests/inputs/cte-recursion.sql | 19 +++- .../sql-tests/results/cte-recursion.sql.out | 101 ++++++++++++++++++ 3 files changed, 171 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out index 8096d06c47340..42baaa8bb82b5 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out @@ -1167,3 +1167,55 @@ WithCTE +- Project [a#x] +- SubqueryAlias t1 +- CTERelationRef xxxx, true, [a#x, b#x, c#x], false, false + + +-- !query +CREATE TABLE tb ( + next INT +) +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`tb`, false + + +-- !query +INSERT INTO tb VALUES (0), (1) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/tb, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/tb], Append, `spark_catalog`.`default`.`tb`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/tb), [next] ++- Project [cast(col1#x as int) AS next#x] + +- LocalRelation [col1#x] + + +-- !query +WITH RECURSIVE t(n) AS ( + SELECT 1 + UNION ALL + SELECT next FROM t CROSS JOIN tb + ) +SELECT * FROM t LIMIT 63 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t +: +- Project [1#x AS n#x] +: +- UnionLoop xxxx +: :- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [next#x] +: +- Join Cross +: :- SubqueryAlias t +: : +- Project [1#x AS n#x] +: : +- UnionLoopRef xxxx, [1#x], false +: +- SubqueryAlias spark_catalog.default.tb +: +- Relation spark_catalog.default.tb[next#x] parquet ++- GlobalLimit 63 + +- LocalLimit 63 + +- Project [n#x] + +- SubqueryAlias t + +- CTERelationRef xxxx, true, [n#x], false, false + + +-- !query +DROP TABLE tb +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tb diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql index b32c6f38ea1fd..785162d60ed3e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql @@ -462,4 +462,21 @@ WITH RECURSIVE t1(a,b,c) AS ( SELECT 1,1,1 UNION ALL SELECT a+1,a+1,a+1 FROM t1) -SELECT a FROM t1 LIMIT 5; \ No newline at end of file +SELECT a FROM t1 LIMIT 5; + +-- CROSS JOIN example +CREATE TABLE tb ( + next INT +); + +INSERT INTO tb VALUES (0), (1); + +-- create +WITH RECURSIVE t(n) AS ( + SELECT 1 + UNION ALL + SELECT next FROM t CROSS JOIN tb + ) +SELECT * FROM t LIMIT 63; + +DROP TABLE tb; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out index 5f83ec7c1a85e..a2f0ed8436035 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out @@ -1050,3 +1050,104 @@ struct 3 4 5 + + +-- !query +CREATE TABLE tb ( + next INT +) +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO tb VALUES (0), (1) +-- !query schema +struct<> +-- !query output + + + +-- !query +WITH RECURSIVE t(n) AS ( + SELECT 1 + UNION ALL + SELECT next FROM t CROSS JOIN tb + ) +SELECT * FROM t LIMIT 63 +-- !query schema +struct +-- !query output +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + + +-- !query +DROP TABLE tb +-- !query schema +struct<> +-- !query output + From 87797e10c89b220d0850af178636bb40a03b3c2e Mon Sep 17 00:00:00 2001 From: pavle-martinovic_data Date: Fri, 4 Apr 2025 10:49:44 +0200 Subject: [PATCH 3/3] Add Peter's test to golden file and fix formatting --- .../analyzer-results/cte-recursion.sql.out | 43 +++++++++++++++++-- .../sql-tests/inputs/cte-recursion.sql | 17 +++++--- .../sql-tests/results/cte-recursion.sql.out | 33 ++++++++++++-- 3 files changed, 82 insertions(+), 11 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out index 42baaa8bb82b5..88f3f675c87b2 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out @@ -1170,9 +1170,7 @@ WithCTE -- !query -CREATE TABLE tb ( - next INT -) +CREATE TABLE tb (next INT) -- !query analysis CreateDataSourceTableCommand `spark_catalog`.`default`.`tb`, false @@ -1219,3 +1217,42 @@ DROP TABLE tb -- !query analysis DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tb + + +-- !query +WITH RECURSIVE + x(id) AS (SELECT 1 UNION SELECT 2), + t(id, xid) AS ( + SELECT 0 AS id, 0 AS xid + UNION ALL + SELECT t.id + 1, xid * 10 + x.id FROM t CROSS JOIN x WHERE t.id < 3 + ) +SELECT * FROM t +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias x +: +- Project [1#x AS id#x] +: +- Distinct +: +- Union false, false +: :- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [2 AS 2#x] +: +- OneRowRelation +:- CTERelationDef xxxx, false +: +- SubqueryAlias t +: +- Project [id#x AS id#x, xid#x AS xid#x] +: +- UnionLoop xxxx +: :- Project [0 AS id#x, 0 AS xid#x] +: : +- OneRowRelation +: +- Project [(id#x + 1) AS (id + 1)#x, ((xid#x * 10) + id#x) AS ((xid * 10) + id)#x] +: +- Filter (id#x < 3) +: +- Join Cross +: :- SubqueryAlias t +: : +- Project [id#x AS id#x, xid#x AS xid#x] +: : +- UnionLoopRef xxxx, [id#x, xid#x], false +: +- SubqueryAlias x +: +- CTERelationRef xxxx, true, [id#x], false, false, 2 ++- Project [id#x, xid#x] + +- SubqueryAlias t + +- CTERelationRef xxxx, true, [id#x, xid#x], false, false diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql index 785162d60ed3e..2780462478b05 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql @@ -465,13 +465,10 @@ WITH RECURSIVE t1(a,b,c) AS ( SELECT a FROM t1 LIMIT 5; -- CROSS JOIN example -CREATE TABLE tb ( - next INT -); +CREATE TABLE tb (next INT); INSERT INTO tb VALUES (0), (1); --- create WITH RECURSIVE t(n) AS ( SELECT 1 UNION ALL @@ -479,4 +476,14 @@ WITH RECURSIVE t(n) AS ( ) SELECT * FROM t LIMIT 63; -DROP TABLE tb; \ No newline at end of file +DROP TABLE tb; +-- CROSS JOIN example 2 + +WITH RECURSIVE + x(id) AS (SELECT 1 UNION SELECT 2), + t(id, xid) AS ( + SELECT 0 AS id, 0 AS xid + UNION ALL + SELECT t.id + 1, xid * 10 + x.id FROM t CROSS JOIN x WHERE t.id < 3 + ) +SELECT * FROM t \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out index a2f0ed8436035..90762f81fd51b 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out @@ -1053,9 +1053,7 @@ struct -- !query -CREATE TABLE tb ( - next INT -) +CREATE TABLE tb (next INT) -- !query schema struct<> -- !query output @@ -1151,3 +1149,32 @@ DROP TABLE tb struct<> -- !query output + + +-- !query +WITH RECURSIVE + x(id) AS (SELECT 1 UNION SELECT 2), + t(id, xid) AS ( + SELECT 0 AS id, 0 AS xid + UNION ALL + SELECT t.id + 1, xid * 10 + x.id FROM t CROSS JOIN x WHERE t.id < 3 + ) +SELECT * FROM t +-- !query schema +struct +-- !query output +0 0 +1 1 +1 2 +2 11 +2 12 +2 21 +2 22 +3 111 +3 112 +3 121 +3 122 +3 211 +3 212 +3 221 +3 222