Skip to content

Commit e72fd6b

Browse files
ilicmarkodbsummaryzb
authored andcommitted
[SPARK-51646][SQL] Fix propagating collation in views with default collation
### What changes were proposed in this pull request? Fixed propagating default collation to literals, subqueries, etc., in `CREATE VIEW ... DEFAULT COLLATION ...` query. The issue was that the saved string used to construct the view did not include the `DEFAULT COLLATION` ... clause, resulting in the view being created without collation information. ### Why are the changes needed? Bug fix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tests added to `DefaultCollationTestSuite`. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#50436 from ilicmarkodb/fix_subquery_literals_in_views_with_default_collation. Authored-by: ilicmarkodb <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent c5316ae commit e72fd6b

File tree

3 files changed

+86
-3
lines changed

3 files changed

+86
-3
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ case class AnalysisContext(
158158
referredTempVariableNames: Seq[Seq[String]] = Seq.empty,
159159
outerPlan: Option[LogicalPlan] = None,
160160
isExecuteImmediate: Boolean = false,
161+
collation: Option[String] = None,
161162

162163
/**
163164
* This is a bridge state between this fixed-point [[Analyzer]] and a single-pass [[Resolver]].
@@ -213,7 +214,8 @@ object AnalysisContext {
213214
viewDesc.viewReferredTempViewNames,
214215
mutable.Set(viewDesc.viewReferredTempFunctionNames: _*),
215216
viewDesc.viewReferredTempVariableNames,
216-
isExecuteImmediate = originContext.isExecuteImmediate)
217+
isExecuteImmediate = originContext.isExecuteImmediate,
218+
collation = viewDesc.collation)
217219
set(context)
218220
try f finally { set(originContext) }
219221
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.analysis
1919

20-
import org.apache.spark.sql.catalyst.expressions.{Cast, DefaultStringProducingExpression, Expression, Literal}
20+
import org.apache.spark.sql.catalyst.expressions.{Cast, DefaultStringProducingExpression, Expression, Literal, SubqueryExpression}
2121
import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumns, AlterColumnSpec, AlterTableCommand, AlterViewAs, ColumnDefinition, CreateTable, CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, ReplaceTable, V2CreateTablePlan}
2222
import org.apache.spark.sql.catalyst.rules.Rule
2323
import org.apache.spark.sql.connector.catalog.TableCatalog
@@ -66,6 +66,10 @@ object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] {
6666
StringType(defaultCollation)
6767
}
6868

69+
// Check if view has default collation
70+
case _ if AnalysisContext.get.collation.isDefined =>
71+
StringType(AnalysisContext.get.collation.get)
72+
6973
case _ => StringType(defaultCollation)
7074
}
7175
}
@@ -79,6 +83,8 @@ object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] {
7983
// For CREATE TABLE, only v2 CREATE TABLE command is supported.
8084
// Also, table DEFAULT COLLATION cannot be specified through CREATE TABLE AS SELECT command.
8185
case _: V2CreateTablePlan | _: ReplaceTable | _: CreateView | _: AlterViewAs => true
86+
// Check if view has default collation
87+
case _ if AnalysisContext.get.collation.isDefined => true
8288
case _ => false
8389
}
8490

@@ -126,11 +132,22 @@ object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] {
126132
case columnDef: ColumnDefinition if hasDefaultStringType(columnDef.dataType) =>
127133
newType => columnDef.copy(dataType = replaceDefaultStringType(columnDef.dataType, newType))
128134

129-
case cast: Cast if hasDefaultStringType(cast.dataType) =>
135+
case cast: Cast if hasDefaultStringType(cast.dataType) &&
136+
cast.getTagValue(Cast.USER_SPECIFIED_CAST).isDefined =>
130137
newType => cast.copy(dataType = replaceDefaultStringType(cast.dataType, newType))
131138

132139
case Literal(value, dt) if hasDefaultStringType(dt) =>
133140
newType => Literal(value, replaceDefaultStringType(dt, newType))
141+
142+
case subquery: SubqueryExpression =>
143+
val plan = subquery.plan
144+
newType =>
145+
val newPlan = plan resolveExpressionsUp { expression =>
146+
transformExpression
147+
.andThen(_.apply(newType))
148+
.applyOrElse(expression, identity[Expression])
149+
}
150+
subquery.withNewPlan(newPlan)
134151
}
135152

136153
/**

sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,70 @@ class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
410410
// scalastyle:on
411411
}
412412
}
413+
withView(testView) {
414+
// scalastyle:off
415+
sql(
416+
s"""CREATE OR REPLACE VIEW $testView
417+
| (c1)
418+
| DEFAULT COLLATION sr_ai
419+
| AS SELECT 'Ć' as c1 WHERE 'Ć' = 'C'
420+
|""".stripMargin)
421+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'Č'"), Row(1))
422+
// scalastyle:on
423+
}
424+
}
425+
426+
test("CREATE VIEW with DEFAULT COLLATION") {
427+
withView(testView) {
428+
sql(
429+
s"""CREATE VIEW $testView DEFAULT COLLATION UTF8_LCASE
430+
| as SELECT 'a' as c1
431+
|""".stripMargin)
432+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"), Seq(Row(1)))
433+
}
434+
withTable(testTable) {
435+
sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_LCASE)")
436+
sql(s"INSERT INTO $testTable VALUES ('a'), ('A')")
437+
withView(testView) {
438+
withSQLConf() {
439+
// scalastyle:off
440+
sql(
441+
s"""CREATE VIEW $testView DEFAULT COLLATION SR_AI_CI
442+
| AS SELECT c1 FROM $testTable
443+
| WHERE 'ć' = 'č'
444+
|""".stripMargin)
445+
// scalastyle:on
446+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView"), Seq(Row(2)))
447+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"), Seq(Row(2)))
448+
}
449+
}
450+
}
451+
withTable(testTable) {
452+
sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_LCASE)")
453+
// scalastyle:off
454+
sql(s"INSERT INTO $testTable VALUES ('ć'), ('č')")
455+
// scalastyle:on
456+
withView(testView) {
457+
sql(
458+
s"""CREATE VIEW $testView DEFAULT COLLATION UNICODE
459+
| AS SELECT CAST(c1 AS STRING COLLATE SR_AI) FROM $testTable
460+
|""".stripMargin)
461+
val prefix = "SYSTEM.BUILTIN"
462+
checkAnswer(sql(s"SELECT DISTINCT COLLATION(c1) FROM $testView"), Row(s"$prefix.sr_AI"))
463+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'c'"), Row(2))
464+
}
465+
}
466+
withView(testView) {
467+
sql(
468+
s"""CREATE VIEW $testView DEFAULT COLLATION UTF8_LCASE
469+
| AS SELECT 'a' AS c1,
470+
| (SELECT (SELECT CASE 'a' = 'A' WHEN TRUE THEN 'a' ELSE 'b' END)
471+
| WHERE (SELECT 'b' WHERE 'c' = 'C') = 'B') AS c2
472+
|""".stripMargin)
473+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"), Seq(Row(1)))
474+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c2 = 'a'"), Seq(Row(1)))
475+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c2 = 'b'"), Seq(Row(0)))
476+
}
413477
}
414478
}
415479

0 commit comments

Comments
 (0)