Skip to content

Commit 3bdf146

Browse files
mihailom-dbMaxGekk
authored andcommitted
[SPARK-49611][SQL][FOLLOW-UP] Fix wrong results of collations() TVF
### What changes were proposed in this pull request? Fix of accent sensitive and case sensitive column results. ### Why are the changes needed? When initial PR was introduced, ICU collation listing ended up with different order of generating columns so results were wrong. ### Does this PR introduce _any_ user-facing change? No, as spark 4.0 was not released yet. ### How was this patch tested? Existing test in CollationSuite.scala, which was wrong in the first place. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48152 from mihailom-db/tvf-collations-followup. Authored-by: Mihailo Milosevic <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent f3c8d26 commit 3bdf146

File tree

2 files changed

+14
-14
lines changed

2 files changed

+14
-14
lines changed

common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -773,8 +773,8 @@ protected CollationMeta buildCollationMeta() {
773773
ICULocaleMap.get(locale).getDisplayCountry(),
774774
VersionInfo.ICU_VERSION.toString(),
775775
COLLATION_PAD_ATTRIBUTE,
776-
caseSensitivity == CaseSensitivity.CS,
777-
accentSensitivity == AccentSensitivity.AS);
776+
accentSensitivity == AccentSensitivity.AS,
777+
caseSensitivity == CaseSensitivity.CS);
778778
}
779779

780780
/**

sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,17 +1661,17 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
16611661
Row("SYSTEM", "BUILTIN", "UNICODE", "", "",
16621662
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
16631663
Row("SYSTEM", "BUILTIN", "UNICODE_AI", "", "",
1664-
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
1665-
Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
16661664
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
1665+
Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "",
1666+
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
16671667
Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", "", "",
16681668
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
16691669
Row("SYSTEM", "BUILTIN", "af", "Afrikaans", "",
16701670
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
16711671
Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", "",
1672-
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
1673-
Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
16741672
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
1673+
Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "",
1674+
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
16751675
Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", "",
16761676
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
16771677

@@ -1683,19 +1683,19 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
16831683
Seq(Row("SYSTEM", "BUILTIN", "zh_Hant_HKG", "Chinese", "Hong Kong SAR China",
16841684
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
16851685
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_AI", "Chinese", "Hong Kong SAR China",
1686-
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
1687-
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR China",
16881686
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
1687+
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR China",
1688+
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
16891689
Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI_AI", "Chinese", "Hong Kong SAR China",
16901690
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
16911691

16921692
checkAnswer(sql("SELECT * FROM collations() WHERE COUNTRY = 'Singapore'"),
16931693
Seq(Row("SYSTEM", "BUILTIN", "zh_Hans_SGP", "Chinese", "Singapore",
16941694
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
16951695
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_AI", "Chinese", "Singapore",
1696-
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
1697-
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI", "Chinese", "Singapore",
16981696
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
1697+
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI", "Chinese", "Singapore",
1698+
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
16991699
Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI_AI", "Chinese", "Singapore",
17001700
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
17011701

@@ -1704,17 +1704,17 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
17041704
Seq(Row("SYSTEM", "BUILTIN", "en_USA", "English", "United States",
17051705
"ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
17061706
Row("SYSTEM", "BUILTIN", "en_USA_AI", "English", "United States",
1707-
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
1708-
Row("SYSTEM", "BUILTIN", "en_USA_CI", "English", "United States",
17091707
"ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"),
1708+
Row("SYSTEM", "BUILTIN", "en_USA_CI", "English", "United States",
1709+
"ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"),
17101710
Row("SYSTEM", "BUILTIN", "en_USA_CI_AI", "English", "United States",
17111711
"ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0")))
17121712

17131713
checkAnswer(sql("SELECT NAME, LANGUAGE, ACCENT_SENSITIVITY, CASE_SENSITIVITY " +
17141714
"FROM collations() WHERE COUNTRY = 'United States'"),
17151715
Seq(Row("en_USA", "English", "ACCENT_SENSITIVE", "CASE_SENSITIVE"),
1716-
Row("en_USA_AI", "English", "ACCENT_SENSITIVE", "CASE_INSENSITIVE"),
1717-
Row("en_USA_CI", "English", "ACCENT_INSENSITIVE", "CASE_SENSITIVE"),
1716+
Row("en_USA_AI", "English", "ACCENT_INSENSITIVE", "CASE_SENSITIVE"),
1717+
Row("en_USA_CI", "English", "ACCENT_SENSITIVE", "CASE_INSENSITIVE"),
17181718
Row("en_USA_CI_AI", "English", "ACCENT_INSENSITIVE", "CASE_INSENSITIVE")))
17191719

17201720
checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),

0 commit comments

Comments
 (0)