Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-49730][SQL] classify syntax errors for pgsql, mysql, sqlserver and h2 #48182

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -1375,6 +1375,21 @@
"Check that the table <tableName> exists."
]
},
"GET_SCHEMA" : {
"message" : [
"Fetching schema for external query or table has failed."
]
},
"SYNTAX_ERROR" : {
"message" : [
"Query generated for external DBMS, during compile contains syntax error. Original query: <query>."
]
},
"EXECUTE_QUERY" : {
"message" : [
"Failed to execute jdbc query: <query>."
]
},
"UNCLASSIFIED" : {
"message" : [
"<message>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ import java.sql.{Connection, Date, Timestamp}
import java.time.LocalDateTime
import java.util.Properties

import org.apache.spark.SparkSQLException
import org.apache.spark.{SparkRuntimeException, SparkSQLException}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{BinaryType, DecimalType}
import org.apache.spark.sql.types._
import org.apache.spark.tags.DockerTest

/**
Expand Down Expand Up @@ -158,6 +158,25 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
assert(types(1).equals("class java.lang.String"))
}

test("SPARK-49730: syntax error classification") {
checkError(
exception = intercept[SparkRuntimeException] {
val schema = StructType(
Seq(StructField("id", IntegerType, true)))

spark.read
.format("jdbc")
.schema(schema)
.option("url", jdbcUrl)
.option("query", "SELECT * FRM tbl")
.load()
},
condition = "FAILED_JDBC.SYNTAX_ERROR",
parameters = Map(
"url" -> jdbcUrl,
"query" -> "SELECT * FROM (SELECT * FRM tbl) SPARK_GEN_SUBQ_0 WHERE 1=0"))
}

test("Numeric types") {
Seq(true, false).foreach { flag =>
withSQLConf(SQLConf.LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED.key -> s"$flag") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ import java.util.Properties

import scala.util.Using

import org.apache.spark.SparkRuntimeException
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.ShortType
import org.apache.spark.sql.types._
import org.apache.spark.tags.DockerTest

/**
Expand Down Expand Up @@ -119,6 +120,25 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
assert(types(1).equals("class java.lang.String"))
}

test("SPARK-49730: syntax error classification") {
checkError(
exception = intercept[SparkRuntimeException] {
val schema = StructType(
Seq(StructField("id", IntegerType, true)))

spark.read
.format("jdbc")
.schema(schema)
.option("url", jdbcUrl)
.option("query", "SELECT * FRM tbl")
.load()
},
condition = "FAILED_JDBC.SYNTAX_ERROR",
parameters = Map(
"url" -> jdbcUrl,
"query" -> "SELECT * FROM (SELECT * FRM tbl) SPARK_GEN_SUBQ_0 WHERE 1=0"))
}

test("Numeric types") {
val row = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties).head()
assert(row.length === 10)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import java.text.SimpleDateFormat
import java.time.LocalDateTime
import java.util.Properties

import org.apache.spark.SparkException
import org.apache.spark.{SparkException, SparkRuntimeException}
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.functions.lit
Expand Down Expand Up @@ -365,6 +365,25 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
assert(rows(0).getSeq(26) == Seq("10:20:10,14,15"))
}

test("SPARK-49730: syntax error classification") {
checkError(
exception = intercept[SparkRuntimeException] {
val schema = StructType(
Seq(StructField("id", IntegerType, true)))

spark.read
.format("jdbc")
.schema(schema)
.option("url", jdbcUrl)
.option("query", "SELECT * FRM tbl")
.load()
},
condition = "FAILED_JDBC.SYNTAX_ERROR",
parameters = Map(
"url" -> jdbcUrl,
"query" -> "SELECT * FROM (SELECT * FRM tbl) SPARK_GEN_SUBQ_0 WHERE 1=0"))
}

test("query JDBC option") {
val expectedResult = Set(
(42, 123456789012345L)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
"jdbcQueryString" -> jdbcQueryString))
}

def jdbcGeneratedQuerySyntaxError(url: String, query: String): Throwable = {
new SparkRuntimeException(
errorClass = "FAILED_JDBC.SYNTAX_ERROR",
messageParameters = Map(
"query" -> query,
"url" -> url
)
)
}

def missingJdbcTableNameAndQueryError(
jdbcTableName: String, jdbcQueryString: String): SparkIllegalArgumentException = {
new SparkIllegalArgumentException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,16 @@ class JDBCRDD(
stmt.setQueryTimeout(options.queryTimeout)

val startTime = System.nanoTime
rs = stmt.executeQuery()
JdbcUtils.classifyException(
errorClass = "FAILED_JDBC.EXECUTE_QUERY",
messageParameters = Map(
"url" -> options.url,
"query" -> sqlText),
dialect,
description = s"Failed to execute jdbc query: $sqlText"
) {
rs = stmt.executeQuery()
}
val endTime = System.nanoTime

val executionTime = endTime - startTime
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,17 @@ private[sql] object JDBCRelation extends Logging {
* @return resolved Catalyst schema of a JDBC table
*/
def getSchema(resolver: Resolver, jdbcOptions: JDBCOptions): StructType = {
val tableSchema = JDBCRDD.resolveTable(jdbcOptions)
val dialect = JdbcDialects.get(jdbcOptions.url)
val tableSchema = JdbcUtils.classifyException(
errorClass = "FAILED_JDBC.GET_TABLES",
messageParameters = Map(
"query" -> dialect.getSchemaQuery(jdbcOptions.tableOrQuery),
"url" -> jdbcOptions.url),
dialect = dialect,
description =
s"Failed to fetch schema for: ${dialect.getSchemaQuery(jdbcOptions.tableOrQuery)}") {
JDBCRDD.resolveTable(jdbcOptions)
}
jdbcOptions.customSchema match {
case Some(customSchema) => JdbcUtils.getCustomSchema(
tableSchema, customSchema, resolver)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
import org.apache.spark.sql.connector.catalog.index.TableIndex
import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference}
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, DecimalType, MetadataBuilder, ShortType, StringType, TimestampType}

Expand Down Expand Up @@ -240,6 +241,11 @@ private[sql] case class H2Dialect() extends JdbcDialect with NoLegacyJDBCError {
val indexName = messageParameters("indexName")
val tableName = messageParameters("tableName")
throw new NoSuchIndexException(indexName, tableName, cause = Some(e))
// SYNTAX_ERROR_1, SYNTAX_ERROR_2
case 42000 | 42001 =>
throw QueryExecutionErrors.jdbcGeneratedQuerySyntaxError(
messageParameters.get("url").getOrElse(""),
messageParameters.get("query").getOrElse(""))
case _ => // do nothing
}
case _ => // do nothing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ private case class MsSqlServerDialect() extends JdbcDialect with NoLegacyJDBCErr
case 15335 if errorClass == "FAILED_JDBC.RENAME_TABLE" =>
val newTable = messageParameters("newName")
throw QueryCompilationErrors.tableAlreadyExistsError(newTable)
case 102 | 122 | 142 | 148 | 156 | 319 | 336 =>
throw QueryExecutionErrors.jdbcGeneratedQuerySyntaxError(
messageParameters.get("url").getOrElse(""),
messageParameters.get("query").getOrElse(""))
case _ => super.classifyException(e, errorClass, messageParameters, description)
}
case _ => super.classifyException(e, errorClass, messageParameters, description)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,10 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No
val indexName = messageParameters("indexName")
val tableName = messageParameters("tableName")
throw new NoSuchIndexException(indexName, tableName, cause = Some(e))
case 1064 =>
throw QueryExecutionErrors.jdbcGeneratedQuerySyntaxError(
messageParameters.get("url").getOrElse(""),
messageParameters.get("query").getOrElse(""))
case _ => super.classifyException(e, errorClass, messageParameters, description)
}
case unsupported: UnsupportedOperationException => throw unsupported
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonE
import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.connector.expressions.NamedReference
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
import org.apache.spark.sql.types._
Expand Down Expand Up @@ -291,6 +292,10 @@ private case class PostgresDialect()
namespace = messageParameters.get("namespace").toArray,
details = sqlException.getMessage,
cause = Some(e))
case "42601" =>
throw QueryExecutionErrors.jdbcGeneratedQuerySyntaxError(
messageParameters.get("url").getOrElse(""),
messageParameters.get("query").getOrElse(""))
case _ => super.classifyException(e, errorClass, messageParameters, description)
}
case unsupported: UnsupportedOperationException => throw unsupported
Expand Down
22 changes: 20 additions & 2 deletions sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import scala.util.Random
import org.mockito.ArgumentMatchers._
import org.mockito.Mockito._

import org.apache.spark.{SparkException, SparkSQLException}
import org.apache.spark.{SparkException, SparkRuntimeException, SparkSQLException}
import org.apache.spark.sql.{AnalysisException, DataFrame, Observation, QueryTest, Row}
import org.apache.spark.sql.catalyst.{analysis, TableIdentifier}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
Expand Down Expand Up @@ -1515,6 +1515,25 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
assert(res === (foobarCnt, 0L, foobarCnt) :: Nil)
}

test("SPARK-49730: syntax error classification") {
checkError(
exception = intercept[SparkRuntimeException] {
val schema = StructType(
Seq(StructField("id", IntegerType, true, defaultMetadata(IntegerType))))

spark.read
.format("jdbc")
.schema(schema)
.option("url", urlWithUserAndPass)
.option("query", "SELECT * FRM tbl")
.load()
},
condition = "FAILED_JDBC.SYNTAX_ERROR",
parameters = Map(
"url" -> urlWithUserAndPass,
"query" -> "SELECT * FROM (SELECT * FRM tbl) SPARK_GEN_SUBQ_0 WHERE 1=0"))
}

test("unsupported types") {
checkError(
exception = intercept[SparkSQLException] {
Expand All @@ -1524,7 +1543,6 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
parameters = Map("typeName" -> "INTEGER ARRAY", "jdbcType" -> "ARRAY"))
}


test("SPARK-47394: Convert TIMESTAMP WITH TIME ZONE to TimestampType") {
Seq(true, false).foreach { prefer =>
val df = spark.read
Expand Down