Skip to content

Commit

Permalink
[SPARK-49538][SQL][TESTS] Detect unused error message parameters
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
In the PR, I propose to detect unused error message parameters while substituting placeholders in error message formats, and raise an internal error in tests when the number of parameters is greater than the number of placeholders. That might indicate presence of unused message parameters.

### Why are the changes needed?
Might happens that the passed error message parameters and placeholders in message format are not matched, and contain extra items. From the code maintainability perspective, it would be nice to detect such cases while running tests.

For example, the error message format could look like:

```json
"CANNOT_UP_CAST_DATATYPE" : {
  "message" : [
    "Cannot up cast <expression> from <sourceType> to <targetType>.",
    "<details>"
    ],
  "sqlState" : "42846"
},
```

but the passed message parameters have extra parameter:

```scala
messageParameters = Map(
  "expression" -> "CAST('aaa' AS LONG)",
  "sourceType" -> "STRING",
  "targetType" -> "LONG",
  "op" -> "CAST", // unused parameter
  "details" -> "implicit cast"
))
```

This can happen because:
- tech editor/dev forgot to mention some parameters in error message format,
- wrong usage of error conditions/sub-conditions
- source code became outdated
- typos in error message formats

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running the added test via:
```
$ build/mvn -Dtest=none -DwildcardSuites=org.apache.spark.SparkThrowableSuite test
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #48026 from MaxGekk/detect-unused-error-params.

Authored-by: Max Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
MaxGekk committed Sep 10, 2024
1 parent f5d1f35 commit 5f2d839
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 24 deletions.
6 changes: 3 additions & 3 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@
},
"NON_STRING_TYPE" : {
"message" : [
"all arguments must be strings."
"all arguments of the function <funcName> must be strings."
]
},
"NULL_TYPE" : {
Expand Down Expand Up @@ -6222,7 +6222,7 @@
"Detected implicit cartesian product for <joinType> join between logical plans",
"<leftPlan>",
"and",
"rightPlan",
"<rightPlan>",
"Join condition is missing or trivial.",
"Either: use the CROSS JOIN syntax to allow cartesian products between these relations, or: enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=true."
]
Expand Down Expand Up @@ -7827,7 +7827,7 @@
},
"_LEGACY_ERROR_TEMP_3055" : {
"message" : [
"ScalarFunction '<scalarFunc.name>' neither implement magic method nor override 'produceResult'"
"ScalarFunction <scalarFunc> neither implement magic method nor override 'produceResult'"
]
},
"_LEGACY_ERROR_TEMP_3056" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package org.apache.spark

import java.net.URL

import scala.collection.immutable.Map
import scala.jdk.CollectionConverters._

import com.fasterxml.jackson.annotation.JsonIgnore
Expand Down Expand Up @@ -52,7 +51,7 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
val sub = new StringSubstitutor(sanitizedParameters.asJava)
sub.setEnableUndefinedVariableException(true)
sub.setDisableSubstitutionInValues(true)
try {
val errorMessage = try {
sub.replace(ErrorClassesJsonReader.TEMPLATE_REGEX.replaceAllIn(
messageTemplate, "\\$\\{$1\\}"))
} catch {
Expand All @@ -61,6 +60,17 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
s"MessageTemplate: $messageTemplate, " +
s"Parameters: $messageParameters", i)
}
if (util.SparkEnvUtils.isTesting) {
val placeHoldersNum = ErrorClassesJsonReader.TEMPLATE_REGEX.findAllIn(messageTemplate).length
if (placeHoldersNum < sanitizedParameters.size) {
throw SparkException.internalError(
s"Found unused message parameters of the error class '$errorClass'. " +
s"Its error message format has $placeHoldersNum placeholders, " +
s"but the passed message parameters map has ${sanitizedParameters.size} items. " +
"Consider to add placeholders to the error format or remove unused message parameters.")
}
}
errorMessage
}

def getMessageParameters(errorClass: String): Seq[String] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,6 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
assert(exception.getErrorClass != null)
assert(exception.getMessageParameters().get("id") == query.id.toString)
assert(exception.getMessageParameters().get("runId") == query.runId.toString)
assert(!exception.getMessageParameters().get("startOffset").isEmpty)
assert(!exception.getMessageParameters().get("endOffset").isEmpty)
assert(exception.getCause.isInstanceOf[SparkException])
assert(exception.getCause.getCause.isInstanceOf[SparkException])
assert(
Expand Down Expand Up @@ -374,8 +372,6 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
assert(exception.getErrorClass != null)
assert(exception.getMessageParameters().get("id") == query.id.toString)
assert(exception.getMessageParameters().get("runId") == query.runId.toString)
assert(!exception.getMessageParameters().get("startOffset").isEmpty)
assert(!exception.getMessageParameters().get("endOffset").isEmpty)
assert(exception.getCause.isInstanceOf[SparkException])
assert(exception.getCause.getCause.isInstanceOf[SparkException])
assert(
Expand Down
33 changes: 25 additions & 8 deletions core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,6 @@ class SparkThrowableSuite extends SparkFunSuite {
}
assert(e.getErrorClass === "INTERNAL_ERROR")
assert(e.getMessageParameters().get("message").contains("Undefined error message parameter"))

// Does not fail with too many args (expects 0 args)
assert(getMessage("DIVIDE_BY_ZERO", Map("config" -> "foo", "a" -> "bar")) ==
"[DIVIDE_BY_ZERO] Division by zero. " +
"Use `try_divide` to tolerate divisor being 0 and return NULL instead. " +
"If necessary set foo to \"false\" " +
"to bypass this error. SQLSTATE: 22012")
}

test("Error message is formatted") {
Expand Down Expand Up @@ -504,7 +497,7 @@ class SparkThrowableSuite extends SparkFunSuite {
|{
| "MISSING_PARAMETER" : {
| "message" : [
| "Parameter ${param} is missing."
| "Parameter <param> is missing."
| ]
| }
|}
Expand All @@ -517,4 +510,28 @@ class SparkThrowableSuite extends SparkFunSuite {
assert(errorMessage.contains("Parameter null is missing."))
}
}

test("detect unused message parameters") {
checkError(
exception = intercept[SparkException] {
SparkThrowableHelper.getMessage(
errorClass = "CANNOT_UP_CAST_DATATYPE",
messageParameters = Map(
"expression" -> "CAST('aaa' AS LONG)",
"sourceType" -> "STRING",
"targetType" -> "LONG",
"op" -> "CAST", // unused parameter
"details" -> "implicit cast"
))
},
errorClass = "INTERNAL_ERROR",
parameters = Map(
"message" ->
("Found unused message parameters of the error class 'CANNOT_UP_CAST_DATATYPE'. " +
"Its error message format has 4 placeholders, but the passed message parameters map " +
"has 5 items. Consider to add placeholders to the error format or " +
"remove unused message parameters.")
)
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -371,13 +371,7 @@ abstract class StreamExecution(
messageParameters = Map(
"id" -> id.toString,
"runId" -> runId.toString,
"message" -> message,
"queryDebugString" -> toDebugString(includeLogicalPlan = isInitialized),
"startOffset" -> getLatestExecutionContext().startOffsets.toOffsetSeq(
sources.toSeq, getLatestExecutionContext().offsetSeqMetadata).toString,
"endOffset" -> getLatestExecutionContext().endOffsets.toOffsetSeq(
sources.toSeq, getLatestExecutionContext().offsetSeqMetadata).toString
))
"message" -> message))

errorClassOpt = e match {
case t: SparkThrowable => Option(t.getErrorClass)
Expand Down

0 comments on commit 5f2d839

Please sign in to comment.