Skip to content

Commit

Permalink
[SPARK-49611][SQL][FOLLOW-UP] Fix wrong results of collations() TVF
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Fix of accent sensitive and case sensitive column results.

### Why are the changes needed?
When initial PR was introduced, ICU collation listing ended up with different order of generating columns so results were wrong.

### Does this PR introduce _any_ user-facing change?
No, as spark 4.0 was not released yet.

### How was this patch tested?
Existing test in CollationSuite.scala, which was wrong in the first place.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #48152 from mihailom-db/tvf-collations-followup.

Authored-by: Mihailo Milosevic <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
mihailom-db authored and MaxGekk committed Sep 19, 2024
1 parent f3c8d26 commit 3bdf146
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,8 @@ protected CollationMeta buildCollationMeta() {
ICULocaleMap.get(locale).getDisplayCountry(),
VersionInfo.ICU_VERSION.toString(),
COLLATION_PAD_ATTRIBUTE,
caseSensitivity == CaseSensitivity.CS,
accentSensitivity == AccentSensitivity.AS);
accentSensitivity == AccentSensitivity.AS,
caseSensitivity == CaseSensitivity.CS);
}

/**
Expand Down
24 changes: 12 additions & 12 deletions sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1661,17 +1661,17 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
Row("SYSTEM", "BUILTIN", "UNICODE", "", "",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "UNICODE_AI", "", "",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", "", "",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af", "Afrikaans", "",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", "",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", "",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))

Expand All @@ -1683,19 +1683,19 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
Seq(Row("SYSTEM", "BUILTIN", "zh_Hant_HKG", "Chinese", "Hong Kong SAR China",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_AI", "Chinese", "Hong Kong SAR China",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR China",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR China",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI_AI", "Chinese", "Hong Kong SAR China",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))

checkAnswer(sql("SELECT * FROM collations() WHERE COUNTRY = 'Singapore'"),
Seq(Row("SYSTEM", "BUILTIN", "zh_Hans_SGP", "Chinese", "Singapore",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_AI", "Chinese", "Singapore",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI", "Chinese", "Singapore",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI", "Chinese", "Singapore",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI_AI", "Chinese", "Singapore",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))

Expand All @@ -1704,17 +1704,17 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
Seq(Row("SYSTEM", "BUILTIN", "en_USA", "English", "United States",
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "en_USA_AI", "English", "United States",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "en_USA_CI", "English", "United States",
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "en_USA_CI", "English", "United States",
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
Row("SYSTEM", "BUILTIN", "en_USA_CI_AI", "English", "United States",
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))

checkAnswer(sql("SELECT NAME, LANGUAGE, ACCENT_SENSITIVITY, CASE_SENSITIVITY " +
"FROM collations() WHERE COUNTRY = 'United States'"),
Seq(Row("en_USA", "English", "ACCENT_SENSITIVE", "CASE_SENSITIVE"),
Row("en_USA_AI", "English", "ACCENT_SENSITIVE", "CASE_INSENSITIVE"),
Row("en_USA_CI", "English", "ACCENT_INSENSITIVE", "CASE_SENSITIVE"),
Row("en_USA_AI", "English", "ACCENT_INSENSITIVE", "CASE_SENSITIVE"),
Row("en_USA_CI", "English", "ACCENT_SENSITIVE", "CASE_INSENSITIVE"),
Row("en_USA_CI_AI", "English", "ACCENT_INSENSITIVE", "CASE_INSENSITIVE")))

checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
Expand Down

0 comments on commit 3bdf146

Please sign in to comment.