From f9ca519897d5098ddf08299ef1331f4d34d8a2b8 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Fri, 5 Jan 2024 11:37:20 +0800 Subject: [PATCH] [SPARK-46179][SQL] Add CrossDbmsQueryTestSuites, which runs other DBMS against golden files with other DBMS, starting with Postgres ### What changes were proposed in this pull request? Create `CrossDbmsQueryTestSuite`, which extends `SQLQueryTestHelper` and `DockerIntegrationSuite`. `CrossDbmsQueryTestSuite` is a trait class that allows testing golden files against other DBMS. `PostgreSQLQueryTestSuite` is an implementation of `CrossDbmsQueryTestSuite`. For starters, sql files in the subquery sql-tests are automatically opted into this test. In this PR, all files except for `exists-having.sql` are ignored, otherwise this PR would have 10K+ line changes (I would like to do that in the next PR, if possible). I had to change the syntax for view creation in `exists-having.sql` slightly, and this is reflected in the `analyzer-results` file, but crucially, the query output (in the `results` file) are not changed. Note that this will not be applicable to many of the current sql tests we have due to: - Incompatible SQL syntax between spark sql and postgres. - Incompatible data types. - Difference in numerical precision with doubles. - Missing functions in either system. - Test files with specific configs, such as ANSI, count bug etc. ### Why are the changes needed? For correctness checking of our SQLQueryTestSuites, we want to run SQLQueryTestSuites with Postgres as a reference DBMS. This can be easily extensible to other DBMS. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This is a test-related PR, does not affect system behaviors. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44084 from andylam-db/crossdbms. Authored-by: Andy Lam Signed-off-by: Wenchen Fan --- .../querytest/CrossDbmsQueryTestSuite.scala | 183 +++++++++++ .../querytest/PostgreSQLQueryTestSuite.scala | 73 +++++ .../exists-subquery/exists-having.sql.out | 206 +++++------- .../exists-subquery/exists-aggregate.sql | 1 + .../subquery/exists-subquery/exists-basic.sql | 1 + .../exists-subquery/exists-count-bug.sql | 1 + .../subquery/exists-subquery/exists-cte.sql | 1 + .../exists-subquery/exists-having.sql | 55 ++-- .../exists-in-join-condition.sql | 1 + .../exists-joins-and-set-ops.sql | 1 + .../exists-subquery/exists-orderby-limit.sql | 1 + .../exists-subquery/exists-outside-filter.sql | 1 + .../exists-subquery/exists-within-and-or.sql | 1 + .../inputs/subquery/in-subquery/in-basic.sql | 1 + .../subquery/in-subquery/in-count-bug.sql | 1 + .../subquery/in-subquery/in-group-by.sql | 1 + .../inputs/subquery/in-subquery/in-having.sql | 1 + .../inputs/subquery/in-subquery/in-joins.sql | 1 + .../inputs/subquery/in-subquery/in-limit.sql | 1 + .../in-subquery/in-multiple-columns.sql | 1 + .../in-subquery/in-null-semantics.sql | 1 + .../subquery/in-subquery/in-nullability.sql | 1 + .../subquery/in-subquery/in-order-by.sql | 1 + .../in-subquery/in-set-operations.sql | 1 + .../in-subquery-in-join-condition.sql | 1 + .../subquery/in-subquery/in-with-cte.sql | 1 + .../subquery/in-subquery/nested-not-in.sql | 1 + .../subquery/in-subquery/not-in-group-by.sql | 1 + .../subquery/in-subquery/not-in-joins.sql | 1 + ...not-in-unit-tests-multi-column-literal.sql | 1 + .../not-in-unit-tests-multi-column.sql | 1 + ...ot-in-unit-tests-single-column-literal.sql | 1 + .../not-in-unit-tests-single-column.sql | 1 + .../inputs/subquery/in-subquery/simple-in.sql | 1 + .../negative-cases/invalid-correlation.sql | 1 + .../negative-cases/subq-input-typecheck.sql | 1 + .../nested-scalar-subquery-count-bug.sql | 1 + .../scalar-subquery-count-bug.sql | 1 + .../scalar-subquery-predicate.sql | 1 + .../scalar-subquery-select.sql | 1 + .../scalar-subquery-set-op.sql | 1 + .../inputs/subquery/subquery-in-from.sql | 1 + .../inputs/subquery/subquery-offset.sql | 1 + .../exists-subquery/exists-having.sql.out | 55 ++-- .../apache/spark/sql/SQLQueryTestHelper.scala | 307 ++++++++++++++++++ .../apache/spark/sql/SQLQueryTestSuite.scala | 299 +---------------- 46 files changed, 738 insertions(+), 479 deletions(-) create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala new file mode 100644 index 0000000000000..a693920c10032 --- /dev/null +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.jdbc + +import java.io.File +import java.sql.ResultSet + +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.spark.sql.Row +import org.apache.spark.sql.SQLQueryTestHelper +import org.apache.spark.sql.catalyst.util.fileToString + +/** + * This suite builds off of that to allow us to run other DBMS against the SQL test golden files (on + * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness. + * Note that this is not currently run on all SQL input files by default because there is + * incompatibility between SQL dialects for Spark and the other DBMS. + * + * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for + * which is eligible for the SQL file. These strings are defined in the companion object. For + * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is + * incompatible, add the following comment into the input file: + * --ONLY_IF spark + */ +trait CrossDbmsQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper { + + val DATABASE_NAME: String + + protected val baseResourcePath = { + // We use a path based on Spark home for 2 reasons: + // 1. Maven can't get correct resource directory when resources in other jars. + // 2. We test subclasses in the hive-thriftserver module. + getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile + } + protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath + protected val customInputFilePath: String + protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath + + protected def listTestCases: Seq[TestCase] = { + listFilesRecursively(new File(customInputFilePath)).flatMap { file => + val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + val absPath = file.getAbsolutePath + val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator) + RegularTestCase(testCaseName, absPath, resultFile) :: Nil + }.sortBy(_.name) + } + + def createScalaTestCase(testCase: TestCase): Unit = { + testCase match { + case _: RegularTestCase => + // Create a test case to run this case. + test(testCase.name) { + runSqlTestCase(testCase, listTestCases) + } + case _ => + ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { + log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.") + } + } + } + + protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { + val input = fileToString(new File(testCase.inputFile)) + val (comments, code) = splitCommentsAndCodes(input) + val queries = getQueries(code, comments, listTestCases) + + val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG)) + .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length)) + // If `--ONLY_IF` is found, check if the DBMS being used is allowed. + if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) { + log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + + s"not eligible with $DATABASE_NAME.") + } else { + runQueriesAndCheckAgainstGoldenFile(queries, testCase) + } + } + + protected def runQueriesAndCheckAgainstGoldenFile( + queries: Seq[String], testCase: TestCase): Unit = { + // The local Spark session is needed because we use Spark analyzed plan to check if the query + // result is already semantically sorted, below. + val localSparkSession = spark.newSession() + val conn = getConnection() + val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) + + val outputs: Seq[QueryTestOutput] = queries.map { sql => + val output = { + try { + val sparkDf = localSparkSession.sql(sql) + val isResultSet = stmt.execute(sql) + val rows = ArrayBuffer[Row]() + if (isResultSet) { + val rs = stmt.getResultSet + val metadata = rs.getMetaData + while (rs.next()) { + val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => { + val value = rs.getObject(i) + if (value == null) { + "NULL" + } else { + value + } + })) + rows.append(row) + } + } + val output = rows.map(_.mkString("\t")).toSeq + if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + output + } else { + // Sort the answer manually if it isn't sorted. + output.sorted + } + } catch { + case NonFatal(e) => Seq(e.getClass.getName, e.getMessage) + } + } + + ExecutionOutput( + sql = sql, + // Don't care about the schema for this test. Only care about correctness. + schema = None, + output = output.mkString("\n")) + } + conn.close() + + // Read back the golden files. + var curSegment = 0 + val expectedOutputs: Seq[QueryTestOutput] = { + val goldenOutput = fileToString(new File(testCase.resultFile)) + val segments = goldenOutput.split("-- !query.*\n") + outputs.map { output => + val result = + ExecutionOutput( + segments(curSegment + 1).trim, // SQL + None, // Schema + normalizeTestResults(segments(curSegment + 3))) // Output + // Assume that the golden file always has all 3 segments. + curSegment += 3 + result + } + } + + // Compare results. + assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { + outputs.size + } + + outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => + assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { + output.sql + } + assertResult(expected.output, s"Result did not match" + + s" for query #$i\n${expected.sql}") { + output.output + } + } + } + +} + +object CrossDbmsQueryTestSuite { + + final val POSTGRES = "postgres" + // Argument in input files to indicate that the sql file is restricted to certain systems. + final val ONLY_IF_ARG = "--ONLY_IF " +} diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala new file mode 100644 index 0000000000000..16ca831c56f2f --- /dev/null +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.jdbc + +import java.io.File +import java.sql.Connection + +import org.apache.spark.tags.DockerTest + +/** + * READ THIS IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING: + * Your new SQL test is automatically opted into this suite. It is likely failing because it is not + * compatible with the default Postgres. You have two options: + * 1. (Recommended) Modify your queries to be compatible with both systems. This is recommended + * because it will run your queries against postgres, providing higher correctness testing + * confidence, and you won't have to manually verify the golden files generated with your test. + * 2. Add this line to your .sql file: --ONLY_IF spark + * + * Note: To run this test suite for a specific version (e.g., postgres:15.1): + * {{{ + * ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1 + * ./build/sbt -Pdocker-integration-tests + * "testOnly org.apache.spark.sql.jdbc.PostgreSQLQueryTestSuite" + * }}} + */ +@DockerTest +class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { + + val DATABASE_NAME = CrossDbmsQueryTestSuite.POSTGRES + // Scope to only subquery directory for now. + protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath + + override val db = new DatabaseOnDocker { + override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine") + override val env = Map( + "POSTGRES_PASSWORD" -> "rootpass" + ) + override val usesIpc = false + override val jdbcPort = 5432 + + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" + } + + override def dataPreparation(conn: Connection): Unit = { + conn.prepareStatement( + // Custom function `double` to imitate Spark's function, so that more tests are covered. + """ + |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision + | AS 'select CAST($1 AS double precision);' + | LANGUAGE SQL + | IMMUTABLE + | RETURNS NULL ON NULL INPUT; + |""".stripMargin + ).executeUpdate() + } + + listTestCases.foreach(createScalaTestCase) +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out index ae81d8edc2625..b053cb369adca 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out @@ -1,81 +1,69 @@ -- Automatically generated by SQLQueryTestSuite -- !query -CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) -AS EMP(id, emp_name, hiredate, salary, dept_id) +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70) -- !query analysis -CreateViewCommand `EMP`, SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) -AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +CreateViewCommand `EMP`, [(id,None), (emp_name,None), (hiredate,None), (salary,None), (dept_id,None)], VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70), false, false, LocalTempView, true + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] -- !query -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") -AS DEPT(dept_id, dept_name, state) +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL') -- !query analysis -CreateViewCommand `DEPT`, SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") -AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] +CreateViewCommand `DEPT`, [(dept_id,None), (dept_name,None), (state,None)], VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL'), false, false, LocalTempView, true + +- LocalRelation [col1#x, col2#x, col3#x] -- !query -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) -AS BONUS(emp_name, bonus_amt) +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)) -- !query analysis -CreateViewCommand `BONUS`, SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) -AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true - +- Project [emp_name#x, bonus_amt#x] - +- SubqueryAlias BONUS - +- LocalRelation [emp_name#x, bonus_amt#x] +CreateViewCommand `BONUS`, [(emp_name,None), (bonus_amt,None)], VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)), false, false, LocalTempView, true + +- LocalRelation [col1#x, col2#x] -- !query @@ -92,17 +80,13 @@ Project [dept_id#x, count(1)#xL] : +- Filter (bonus_amt#x < outer(min(salary#x)#x)) : +- SubqueryAlias bonus : +- View (`BONUS`, [emp_name#x, bonus_amt#x]) - : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : +- Project [emp_name#x, bonus_amt#x] - : +- SubqueryAlias BONUS - : +- LocalRelation [emp_name#x, bonus_amt#x] + : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : +- LocalRelation [col1#x, col2#x] +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] +- SubqueryAlias emp +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]) - +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] -- !query @@ -124,23 +108,17 @@ Project [dept_id#x, dept_name#x, state#x] : : +- Filter (bonus_amt#x < outer(min(salary#x)#x)) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x, bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias emp : +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x, dept_name#x, state#x]) - +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x] + +- LocalRelation [col1#x, col2#x, col3#x] -- !query @@ -164,25 +142,19 @@ Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x] : : +- Filter (bonus_amt#x < outer(min(salary#x)#x)) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x, bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias p : +- SubqueryAlias emp : +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias gp +- SubqueryAlias emp +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]) - +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] -- !query @@ -204,23 +176,17 @@ Project [dept_id#x, dept_name#x, state#x] : : +- Filter (bonus_amt#x > outer(min(salary#x)#x)) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x, bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias emp : +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x, dept_name#x, state#x]) - +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x] + +- LocalRelation [col1#x, col2#x, col3#x] -- !query @@ -244,21 +210,15 @@ Project [dept_id#x, dept_name#x, state#x] : : +- Filter ((bonus_amt#x > outer(min(salary#x)#x)) AND (outer(count(dept_id)#xL) > cast(1 as bigint))) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x, bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(dept_id#x) AS count(dept_id)#xL, min(salary#x) AS min(salary#x)#x] : +- Filter (outer(dept_id#x) = dept_id#x) : +- SubqueryAlias emp : +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x, dept_name#x, state#x]) - +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x] + +- LocalRelation [col1#x, col2#x, col3#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql index 17672f9738f1c..9797acc5561bb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.codegen.wholeStage=true --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql index 332e858800f7c..4055f798ad85d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql @@ -1,5 +1,6 @@ -- Tests EXISTS subquery support. Tests basic form -- of EXISTS subquery (both EXISTS and NOT EXISTS) +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql index 3075fef70ad98..2c2799ce432b1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark create temporary view t1(c1, c2) as values (0, 1), (1, 2); create temporary view t2(c1, c2) as values (0, 2), (0, 3); create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql index c6784838158e1..ea8c0fc36ccfc 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql @@ -1,5 +1,6 @@ -- Tests EXISTS subquery used along with -- Common Table Expressions(CTE) +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql index c30159039ff3e..d172220fbc413 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql @@ -1,36 +1,33 @@ -- Tests HAVING clause in subquery. -CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) -AS EMP(id, emp_name, hiredate, salary, dept_id); +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70); -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") -AS DEPT(dept_id, dept_name, state); +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL'); -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) -AS BONUS(emp_name, bonus_amt); +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)); -- simple having in subquery. -- TC.01.01 diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql index 5cff53d33816e..ad2e7ad563e08 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql @@ -6,6 +6,7 @@ -- 3. Join type: inner / left outer / right outer / full outer / left semi / left anti -- 4. AND or OR for the join condition +--ONLY_IF spark CREATE TEMP VIEW x(x1, x2) AS VALUES (2, 1), (1, 1), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql index 28cb2f74748ea..1542fa5149aaf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql @@ -13,6 +13,7 @@ --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql index d920a413ab8fc..2e3055db60afa 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql @@ -5,6 +5,7 @@ --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql index af75103797fe5..585de247b11c5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql @@ -1,5 +1,6 @@ -- Tests EXISTS subquery support where the subquery is used outside the WHERE clause. +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql index 7743b5241d11d..5920b61dade52 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql @@ -1,6 +1,7 @@ -- Tests EXISTS subquery support. Tests EXISTS -- subquery within a AND or OR expression. +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql index 5669423148f80..fc243422bd7c8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql @@ -1,5 +1,6 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view tab_a as select * from values (1, 1) as tab_a(a1, b1); create temporary view tab_b as select * from values (1, 1) as tab_b(a2, b2); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql index 4840f02511b64..f07ead3e868bd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark create temporary view t1(c1, c2) as values (0, 1), (1, 2); create temporary view t2(c1, c2) as values (0, 2), (0, 3); create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql index 168faa0aee7c5..5c962dceb067f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql @@ -6,6 +6,7 @@ --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark create temporary view t1 as select * from values ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql index 750cc42b8641c..4d11ea7005b3b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql index 08eeb1d106fd5..d12144ae7e492 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql @@ -16,6 +16,7 @@ --CONFIG_DIM3 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM3 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql index 5f06f159f0b60..a76da33e501ca 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql index 1a6c06f9dad49..c403c2d66ab1f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql index aad655f05099e..7bf49a3209268 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark create temp view v (c) as values (1), (null); create temp view v_empty (e) as select 1 where false; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql index 07b4afe36c143..30f3a25f0e85a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql @@ -1,4 +1,5 @@ -- SPARK-43413: Tests for IN subquery nullability +--ONLY_IF spark create temp view t0 as select 1 as a_nonnullable; create temp view t1 as select cast(null as int) as b_nullable; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql index 0b006af4130a9..8bf49a1c2d996 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql @@ -8,6 +8,7 @@ --CONFIG_DIM2 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM2 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql index e4a931ce2c800..c6b6a338c9b1d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql @@ -1,5 +1,6 @@ -- A test suite for set-operations in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql index d4fa2f6b0e816..d519abdbacc05 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql @@ -1,4 +1,5 @@ -- Test that correlated EXISTS subqueries in join conditions are supported. +--ONLY_IF spark -- Permutations of the test: -- 1. In / Not In diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql index fa4ae87f041cf..8d08cfb4da346 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql @@ -1,5 +1,6 @@ -- A test suite for in with cte in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql index e2d4ad522d446..9472690d99143 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql @@ -1,4 +1,5 @@ -- Tests NOT-IN subqueries nested inside OR expression(s). +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql index 54b74534c1162..97da00ac2cefd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql @@ -1,5 +1,6 @@ -- A test suite for NOT IN GROUP BY in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark -- Test aggregate operator with codegen on and off. --CONFIG_DIM1 spark.sql.codegen.wholeStage=true diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql index 2d11c5da20633..639034c18f31b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql @@ -1,5 +1,6 @@ -- A test suite for not-in-joins in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql index a061e495f51b8..e9f3fda9c1b92 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql @@ -6,6 +6,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES (null, null), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql index 28ab75121573a..d5a30950b7fe7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql @@ -15,6 +15,7 @@ -- This can be generalized to include more tests for more columns, but it covers the main cases -- when there is more than one column. +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql index 79747022eb1e8..2c3aed95a13ba 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql @@ -6,6 +6,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES (null, 1.0), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql index 8060246bf3a3f..bbf5dce38a962 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql @@ -33,6 +33,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES (null, 1.0), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql index d8a58afa344db..98f1f3d52cbb0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql @@ -3,6 +3,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql index 1260fb73c6d81..0b83ecf43a301 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql @@ -1,5 +1,6 @@ -- The test file contains negative test cases -- of invalid queries where error messages are expected. +--ONLY_IF spark CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1, 2, 3) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql index 98ce1354a1355..da3ed9d11a8bd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql @@ -1,5 +1,6 @@ -- The test file contains negative test cases -- of invalid queries where error messages are expected. +--ONLY_IF spark CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1, 2, 3) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql index 86476389a8577..fe9152a27fe8d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark CREATE OR REPLACE VIEW t1(a1, a2) as values (0, 1), (1, 2); CREATE OR REPLACE VIEW t2(b1, b2) as values (0, 2), (0, 3); CREATE OR REPLACE VIEW t3(c1, c2) as values (0, 2), (0, 3); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql index 94b707ebf2002..c428dd81f287a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql @@ -1,6 +1,7 @@ --CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=true --CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=false +--ONLY_IF spark create temp view l (a, b) as values (1, 2.0), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql index d2d0ef7817178..20ba10176196f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql @@ -1,5 +1,6 @@ -- A test suite for scalar subquery in predicate context +--ONLY_IF spark CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv); CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql index e4f7b25a1684d..ef1e612fd744a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql @@ -1,5 +1,6 @@ -- A test suite for scalar subquery in SELECT clause +--ONLY_IF spark create temporary view t1 as select * from values ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql index 39e456611c03b..d282e477678c9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql @@ -1,5 +1,6 @@ -- Set operations in correlation path +--ONLY_IF spark CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0); CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3); CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql index 1273b56b6344b..662a065cdc13a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark -- Aliased subqueries in FROM clause SELECT * FROM (SELECT * FROM testData) AS t WHERE key = 1; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql index 9b61d1b26270b..cc9c70df9df87 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark drop table if exists x; drop table if exists y; diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out index ae4cf010ffc98..2a84516e90abe 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out @@ -1,16 +1,15 @@ -- Automatically generated by SQLQueryTestSuite -- !query -CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) -AS EMP(id, emp_name, hiredate, salary, dept_id) +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70) -- !query schema struct<> -- !query output @@ -18,14 +17,13 @@ struct<> -- !query -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") -AS DEPT(dept_id, dept_name, state) +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL') -- !query schema struct<> -- !query output @@ -33,16 +31,15 @@ struct<> -- !query -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) -AS BONUS(emp_name, bonus_amt) +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)) -- !query schema struct<> -- !query output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index c08569150e2a9..38e004e0b7209 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -17,19 +17,25 @@ package org.apache.spark.sql +import java.io.File + +import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.ErrorMessageFormat.MINIMAL import org.apache.spark.SparkThrowableHelper.getMessage import org.apache.spark.internal.Logging +import org.apache.spark.sql.IntegratedUDFTestUtils.{TestUDF, TestUDTFSet} import org.apache.spark.sql.catalyst.expressions.{CurrentDate, CurrentTimestampLike, CurrentUser, Literal} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.util.fileToString import org.apache.spark.sql.execution.HiveResult.hiveResultString import org.apache.spark.sql.execution.SQLExecution import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} import org.apache.spark.sql.types.{DateType, StructType, TimestampType} +import org.apache.spark.util.ArrayImplicits.SparkArrayOps trait SQLQueryTestHelper extends Logging { @@ -164,4 +170,305 @@ trait SQLQueryTestHelper extends Logging { (emptySchema, Seq(e.getClass.getName, e.getMessage)) } } + + /** A test case. */ + protected trait TestCase { + val name: String + val inputFile: String + val resultFile: String + def asAnalyzerTest(newName: String, newResultFile: String): TestCase + } + + /** + * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance, + * PgSQL tests require to register some UDF functions. + */ + protected trait PgSQLTest + + /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */ + protected trait AnsiTest + + /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */ + protected trait AnalyzerTest extends TestCase { + override def asAnalyzerTest(newName: String, newResultFile: String): AnalyzerTest = this + } + + /** Trait that indicates the default timestamp type is TimestampNTZType. */ + protected trait TimestampNTZTest + + /** Trait that indicates CTE test cases need their create view versions */ + protected trait CTETest + + protected trait UDFTest { + val udf: TestUDF + } + + protected trait UDTFSetTest { + val udtfSet: TestUDTFSet + } + + protected case class RegularTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + RegularAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** An ANSI-related test case. */ + protected case class AnsiTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + AnsiAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** An analyzer test that shows the analyzed plan string as output. */ + protected case class AnalyzerTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest + + /** A PostgreSQL test case. */ + protected case class PgSQLTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + PgSQLAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** A UDF test case. */ + protected case class UDFTestCase( + name: String, + inputFile: String, + resultFile: String, + udf: TestUDF) extends TestCase with UDFTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDFAnalyzerTestCase(newName, inputFile, newResultFile, udf) + } + + protected case class UDTFSetTestCase( + name: String, + inputFile: String, + resultFile: String, + udtfSet: TestUDTFSet) extends TestCase with UDTFSetTest { + + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDTFSetAnalyzerTestCase(newName, inputFile, newResultFile, udtfSet) + } + + /** A UDAF test case. */ + protected case class UDAFTestCase( + name: String, + inputFile: String, + resultFile: String, + udf: TestUDF) extends TestCase with UDFTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDAFAnalyzerTestCase(newName, inputFile, newResultFile, udf) + } + + /** A UDF PostgreSQL test case. */ + protected case class UDFPgSQLTestCase( + name: String, + inputFile: String, + resultFile: String, + udf: TestUDF) extends TestCase with UDFTest with PgSQLTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDFPgSQLAnalyzerTestCase(newName, inputFile, newResultFile, udf) + } + + /** An date time test case with default timestamp as TimestampNTZType */ + protected case class TimestampNTZTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with TimestampNTZTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + TimestampNTZAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** A CTE test case with special handling */ + protected case class CTETestCase(name: String, inputFile: String, resultFile: String) + extends TestCase + with CTETest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + CTEAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** These are versions of the above test cases, but only exercising analysis. */ + protected case class RegularAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest + protected case class AnsiAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with AnsiTest + protected case class PgSQLAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with PgSQLTest + protected case class UDFAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udf: TestUDF) + extends AnalyzerTest with UDFTest + protected case class UDTFSetAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udtfSet: TestUDTFSet) + extends AnalyzerTest with UDTFSetTest + protected case class UDAFAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udf: TestUDF) + extends AnalyzerTest with UDFTest + protected case class UDFPgSQLAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udf: TestUDF) + extends AnalyzerTest with UDFTest with PgSQLTest + protected case class TimestampNTZAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with TimestampNTZTest + protected case class CTEAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with CTETest + + /** A single SQL query's output. */ + trait QueryTestOutput { + def sql: String + def schema: Option[String] + def output: String + def numSegments: Int + } + + /** A single SQL query's execution output. */ + case class ExecutionOutput( + sql: String, + schema: Option[String], + output: String) extends QueryTestOutput { + override def toString: String = { + // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + val schemaString = if (schema.nonEmpty) { + s"-- !query schema\n" + schema.get + "\n" + } else { + "" + } + s"-- !query\n" + + sql + "\n" + + schemaString + + s"-- !query output\n" + + output + } + + override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 } + } + + /** A single SQL query's analysis results. */ + case class AnalyzerOutput( + sql: String, + schema: Option[String], + output: String) extends QueryTestOutput { + override def toString: String = { + // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + s"-- !query\n" + + sql + "\n" + + s"-- !query analysis\n" + + output + } + override def numSegments: Int = 2 + } + + /** Returns all the files (not directories) in a directory, recursively. */ + protected def listFilesRecursively(path: File): Seq[File] = { + val (dirs, files) = path.listFiles().partition(_.isDirectory) + // Filter out test files with invalid extensions such as temp files created + // by vi (.swp), Mac (.DS_Store) etc. + val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions)) + (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq + } + + protected def splitCommentsAndCodes(input: String): (Array[String], Array[String]) = + input.split("\n").partition { line => + val newLine = line.trim + newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") + } + + protected def getQueries(code: Array[String], comments: Array[String], + allTestCases: Seq[TestCase]): Seq[String] = { + def splitWithSemicolon(seq: Seq[String]) = { + seq.mkString("\n").split("(?<=[^\\\\]);") + } + + // If `--IMPORT` found, load code from another test case file, then insert them + // into the head in this test. + val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9)) + val importedCode = importedTestCaseName.flatMap { testCaseName => + allTestCases.find(_.name == testCaseName).map { testCase => + val input = fileToString(new File(testCase.inputFile)) + val (_, code) = splitCommentsAndCodes(input) + code + } + }.flatten + + val allCode = importedCode ++ code + val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { + // Although the loop is heavy, only used for bracketed comments test. + val queries = new ArrayBuffer[String] + val otherCodes = new ArrayBuffer[String] + var tempStr = "" + var start = false + for (c <- allCode) { + if (c.trim.startsWith("--QUERY-DELIMITER-START")) { + start = true + queries ++= splitWithSemicolon(otherCodes.toSeq) + otherCodes.clear() + } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { + start = false + queries += s"\n${tempStr.stripSuffix(";")}" + tempStr = "" + } else if (start) { + tempStr += s"\n$c" + } else { + otherCodes += c + } + } + if (otherCodes.nonEmpty) { + queries ++= splitWithSemicolon(otherCodes.toSeq) + } + queries.toSeq + } else { + splitWithSemicolon(allCode.toImmutableArraySeq).toSeq + } + + // List of SQL queries to run + tempQueries.map(_.trim).filter(_ != "") + // Fix misplacement when comment is at the end of the query. + .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") + } + + protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = { + val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) + settingLines.flatMap(_.split(",").map { kv => + val (conf, value) = kv.span(_ != '=') + conf.trim -> value.substring(1).trim + }) + } + + protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = { + // A config dimension has multiple config sets, and a config set has multiple configs. + // - config dim: Seq[Seq[(String, String)]] + // - config set: Seq[(String, String)] + // - config: (String, String)) + // We need to do cartesian product for all the config dimensions, to get a list of + // config sets, and run the query once for each config set. + val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12)) + val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines => + lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv => + val (conf, value) = kv.span(_ != '=') + conf.trim -> value.substring(1).trim + }.toSeq).toSeq + } + + configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) => + dim.flatMap { configSet => res.map(_ ++ configSet) } + } + } + + /** This is a helper function to normalize non-deterministic Python error stacktraces. */ + def normalizeTestResults(output: String): String = { + val strippedPythonErrors: String = { + var traceback = false + output.split("\n").filter { line: String => + if (line == "Traceback (most recent call last):") { + traceback = true + } else if (!line.startsWith(" ")) { + traceback = false + } + !traceback + }.mkString("\n") + } + strippedPythonErrors.replaceAll("\\s+$", "") + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 9a78b7f52b74d..0edf06e1d7424 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -21,8 +21,6 @@ import java.io.File import java.net.URI import java.util.Locale -import scala.collection.mutable.ArrayBuffer - import org.apache.spark.{SparkConf, TestUtils} import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.parser.ParseException @@ -171,151 +169,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper // Create all the test cases. listTestCases.foreach(createScalaTestCase) - /** A test case. */ - protected trait TestCase { - val name: String - val inputFile: String - val resultFile: String - def asAnalyzerTest(newName: String, newResultFile: String): TestCase - } - - /** - * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance, - * PgSQL tests require to register some UDF functions. - */ - protected trait PgSQLTest - - /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */ - protected trait AnsiTest - - /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */ - protected trait AnalyzerTest extends TestCase { - override def asAnalyzerTest(newName: String, newResultFile: String): AnalyzerTest = this - } - - /** Trait that indicates the default timestamp type is TimestampNTZType. */ - protected trait TimestampNTZTest - - /** Trait that indicates CTE test cases need their create view versions */ - protected trait CTETest - - protected trait UDFTest { - val udf: TestUDF - } - - protected trait UDTFSetTest { - val udtfSet: TestUDTFSet - } - - /** A regular test case. */ - protected case class RegularTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - RegularAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** An ANSI-related test case. */ - protected case class AnsiTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - AnsiAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** An analyzer test that shows the analyzed plan string as output. */ - protected case class AnalyzerTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest - - /** A PostgreSQL test case. */ - protected case class PgSQLTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - PgSQLAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** A UDF test case. */ - protected case class UDFTestCase( - name: String, - inputFile: String, - resultFile: String, - udf: TestUDF) extends TestCase with UDFTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDFAnalyzerTestCase(newName, inputFile, newResultFile, udf) - } - - protected case class UDTFSetTestCase( - name: String, - inputFile: String, - resultFile: String, - udtfSet: TestUDTFSet) extends TestCase with UDTFSetTest { - - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDTFSetAnalyzerTestCase(newName, inputFile, newResultFile, udtfSet) - } - - /** A UDAF test case. */ - protected case class UDAFTestCase( - name: String, - inputFile: String, - resultFile: String, - udf: TestUDF) extends TestCase with UDFTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDAFAnalyzerTestCase(newName, inputFile, newResultFile, udf) - } - - /** A UDF PostgreSQL test case. */ - protected case class UDFPgSQLTestCase( - name: String, - inputFile: String, - resultFile: String, - udf: TestUDF) extends TestCase with UDFTest with PgSQLTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDFPgSQLAnalyzerTestCase(newName, inputFile, newResultFile, udf) - } - - /** An date time test case with default timestamp as TimestampNTZType */ - protected case class TimestampNTZTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with TimestampNTZTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - TimestampNTZAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** A CTE test case with special handling */ - protected case class CTETestCase(name: String, inputFile: String, resultFile: String) - extends TestCase - with CTETest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - CTEAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** These are versions of the above test cases, but only exercising analysis. */ - protected case class RegularAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest - protected case class AnsiAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with AnsiTest - protected case class PgSQLAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with PgSQLTest - protected case class UDFAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udf: TestUDF) - extends AnalyzerTest with UDFTest - protected case class UDTFSetAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udtfSet: TestUDTFSet) - extends AnalyzerTest with UDTFSetTest - protected case class UDAFAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udf: TestUDF) - extends AnalyzerTest with UDFTest - protected case class UDFPgSQLAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udf: TestUDF) - extends AnalyzerTest with UDFTest with PgSQLTest - protected case class TimestampNTZAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with TimestampNTZTest - protected case class CTEAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with CTETest - protected def createScalaTestCase(testCase: TestCase): Unit = { if (ignoreList.exists(t => testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) { @@ -349,91 +202,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } - protected def splitCommentsAndCodes(input: String) = - input.split("\n").partition { line => - val newLine = line.trim - newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") - } - - protected def getQueries(code: Array[String], comments: Array[String]) = { - def splitWithSemicolon(seq: Seq[String]) = { - seq.mkString("\n").split("(?<=[^\\\\]);") - } - - // If `--IMPORT` found, load code from another test case file, then insert them - // into the head in this test. - val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9)) - val importedCode = importedTestCaseName.flatMap { testCaseName => - listTestCases.find(_.name == testCaseName).map { testCase => - val input = fileToString(new File(testCase.inputFile)) - val (_, code) = splitCommentsAndCodes(input) - code - } - }.flatten - - val allCode = importedCode ++ code - val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { - // Although the loop is heavy, only used for bracketed comments test. - val queries = new ArrayBuffer[String] - val otherCodes = new ArrayBuffer[String] - var tempStr = "" - var start = false - for (c <- allCode) { - if (c.trim.startsWith("--QUERY-DELIMITER-START")) { - start = true - queries ++= splitWithSemicolon(otherCodes.toSeq) - otherCodes.clear() - } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { - start = false - queries += s"\n${tempStr.stripSuffix(";")}" - tempStr = "" - } else if (start) { - tempStr += s"\n$c" - } else { - otherCodes += c - } - } - if (otherCodes.nonEmpty) { - queries ++= splitWithSemicolon(otherCodes.toSeq) - } - queries.toSeq - } else { - splitWithSemicolon(allCode.toImmutableArraySeq).toSeq - } - - // List of SQL queries to run - tempQueries.map(_.trim).filter(_ != "") - // Fix misplacement when comment is at the end of the query. - .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") - } - - protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = { - val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) - settingLines.flatMap(_.split(",").map { kv => - val (conf, value) = kv.span(_ != '=') - conf.trim -> value.substring(1).trim - }) - } - - protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = { - // A config dimension has multiple config sets, and a config set has multiple configs. - // - config dim: Seq[Seq[(String, String)]] - // - config set: Seq[(String, String)] - // - config: (String, String)) - // We need to do cartesian product for all the config dimensions, to get a list of - // config sets, and run the query once for each config set. - val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12)) - val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines => - lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv => - val (conf, value) = kv.span(_ != '=') - conf.trim -> value.substring(1).trim - }.toSeq).toSeq - } - - configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) => - dim.flatMap { configSet => res.map(_ ++ configSet) } - } - } protected def runQueriesWithSparkConfigDimensions( queries: Seq[String], @@ -458,7 +226,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { val input = fileToString(new File(testCase.inputFile)) val (comments, code) = splitCommentsAndCodes(input) - val queries = getQueries(code, comments) + val queries = getQueries(code, comments, listTestCases) val settings = getSparkSettings(comments) if (regenerateGoldenFiles) { @@ -703,15 +471,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper }.sortBy(_.name) } - /** Returns all the files (not directories) in a directory, recursively. */ - protected def listFilesRecursively(path: File): Seq[File] = { - val (dirs, files) = path.listFiles().partition(_.isDirectory) - // Filter out test files with invalid extensions such as temp files created - // by vi (.swp), Mac (.DS_Store) etc. - val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions)) - (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq - } - /** Load built-in test tables into the SparkSession. */ protected def createTestTables(session: SparkSession): Unit = { import session.implicits._ @@ -905,62 +664,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } - /** This is a helper function to normalize non-deterministic Python error stacktraces. */ - def normalizeTestResults(output: String): String = { - val strippedPythonErrors: String = { - var traceback = false - output.split("\n").filter { line: String => - if (line == "Traceback (most recent call last):") { - traceback = true - } else if (!line.startsWith(" ")) { - traceback = false - } - !traceback - }.mkString("\n") - } - strippedPythonErrors.replaceAll("\\s+$", "") - } - - /** A single SQL query's output. */ - trait QueryTestOutput { - def sql: String - def schema: Option[String] - def output: String - def numSegments: Int - } - - /** A single SQL query's execution output. */ - case class ExecutionOutput( - sql: String, - schema: Option[String], - output: String) extends QueryTestOutput { - override def toString: String = { - // We are explicitly not using multi-line string due to stripMargin removing "|" in output. - s"-- !query\n" + - sql + "\n" + - s"-- !query schema\n" + - schema.get + "\n" + - s"-- !query output\n" + - output - } - override def numSegments: Int = 3 - } - - /** A single SQL query's analysis results. */ - case class AnalyzerOutput( - sql: String, - schema: Option[String], - output: String) extends QueryTestOutput { - override def toString: String = { - // We are explicitly not using multi-line string due to stripMargin removing "|" in output. - s"-- !query\n" + - sql + "\n" + - s"-- !query analysis\n" + - output - } - override def numSegments: Int = 2 - } - test("test splitCommentsAndCodes") { { // Correctly split comments and codes