Skip to content

Commit 18cc4bb

Browse files
committed
Support altering multiple columns in the same command
1 parent 3ef529b commit 18cc4bb

File tree

13 files changed

+531
-289
lines changed

13 files changed

+531
-289
lines changed

common/utils/src/main/resources/error/error-conditions.json

+12
Original file line numberDiff line numberDiff line change
@@ -3938,6 +3938,18 @@
39383938
],
39393939
"sqlState" : "0A000"
39403940
},
3941+
"NOT_SUPPORTED_CHANGE_SAME_COLUMN" : {
3942+
"message" : [
3943+
"ALTER TABLE ALTER/CHANGE COLUMN is not supported for changing <table>'s column <fieldName> multiple times in the same command."
3944+
],
3945+
"sqlState" : "0A000"
3946+
},
3947+
"NOT_SUPPORTED_CHANGE_PARENT_CHILD_COLUMN" : {
3948+
"message" : [
3949+
"ALTER TABLE ALTER/CHANGE COLUMN is not supported for changing <table>'s parent field <parentField> and child field <childField> in the same command."
3950+
],
3951+
"sqlState" : "0A000"
3952+
},
39413953
"NOT_SUPPORTED_COMMAND_FOR_V2_TABLE" : {
39423954
"message" : [
39433955
"<cmd> is not supported for v2 tables."

sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4

+9-2
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,7 @@ statement
212212
| ALTER (TABLE | VIEW) identifierReference
213213
UNSET TBLPROPERTIES (IF EXISTS)? propertyList #unsetTableProperties
214214
| ALTER TABLE table=identifierReference
215-
(ALTER | CHANGE) COLUMN? column=multipartIdentifier
216-
alterColumnAction? #alterTableAlterColumn
215+
(ALTER | CHANGE) COLUMN? columns=alterColumnSpecList #alterTableAlterColumn
217216
| ALTER TABLE table=identifierReference partitionSpec?
218217
CHANGE COLUMN?
219218
colName=multipartIdentifier colType colPosition? #hiveChangeColumn
@@ -1489,6 +1488,14 @@ number
14891488
| MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
14901489
;
14911490

1491+
alterColumnSpecList
1492+
: alterColumnSpec (COMMA alterColumnSpec)*
1493+
;
1494+
1495+
alterColumnSpec
1496+
: column=multipartIdentifier alterColumnAction?
1497+
;
1498+
14921499
alterColumnAction
14931500
: TYPE dataType
14941501
| commentSpec

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

+24-19
Original file line numberDiff line numberDiff line change
@@ -3847,26 +3847,31 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
38473847
resolved.copyTagsFrom(a)
38483848
resolved
38493849

3850-
case a @ AlterColumn(
3851-
table: ResolvedTable, ResolvedFieldName(path, field), dataType, _, _, position, _) =>
3852-
val newDataType = dataType.flatMap { dt =>
3853-
// Hive style syntax provides the column type, even if it may not have changed.
3854-
val existing = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType)
3855-
if (existing == dt) None else Some(dt)
3856-
}
3857-
val newPosition = position map {
3858-
case u @ UnresolvedFieldPosition(after: After) =>
3859-
// TODO: since the field name is already resolved, it's more efficient if
3860-
// `ResolvedFieldName` carries the parent struct and we resolve column position
3861-
// based on the parent struct, instead of re-resolving the entire column path.
3862-
val resolved = resolveFieldNames(table, path :+ after.column(), u)
3863-
ResolvedFieldPosition(ColumnPosition.after(resolved.field.name))
3864-
case u: UnresolvedFieldPosition => ResolvedFieldPosition(u.position)
3865-
case other => other
3850+
case a @ AlterColumns(table: ResolvedTable, columns, specs) =>
3851+
val resolvedSpecs = columns.zip(specs).map {
3852+
case (ResolvedFieldName(path, field), s @ AlterColumnSpec(dataType, _, _, position, _)) =>
3853+
val newDataType = dataType.flatMap { dt =>
3854+
// Hive style syntax provides the column type, even if it may not have changed.
3855+
val existing = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType)
3856+
if (existing == dt) None else Some(dt)
3857+
}
3858+
val newPosition = position map {
3859+
case u @ UnresolvedFieldPosition(after: After) =>
3860+
// TODO: since the field name is already resolved, it's more efficient if
3861+
// `ResolvedFieldName` carries the parent struct and we resolve column
3862+
// position based on the parent struct, instead of re-resolving the entire
3863+
// column path.
3864+
val resolved = resolveFieldNames(table, path :+ after.column(), u)
3865+
ResolvedFieldPosition(ColumnPosition.after(resolved.field.name))
3866+
case u: UnresolvedFieldPosition => ResolvedFieldPosition(u.position)
3867+
case other => other
3868+
}
3869+
s.copy(dataType = newDataType, position = newPosition)
3870+
case (_, spec) => spec
38663871
}
3867-
val resolved = a.copy(dataType = newDataType, position = newPosition)
3868-
resolved.copyTagsFrom(a)
3869-
resolved
3872+
val resolved = a.copy(specs = resolvedSpecs)
3873+
resolved.copyTagsFrom(a)
3874+
resolved
38703875
}
38713876

38723877
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

+71-47
Original file line numberDiff line numberDiff line change
@@ -1622,60 +1622,84 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
16221622
case RenameColumn(table: ResolvedTable, col: ResolvedFieldName, newName) =>
16231623
checkColumnNotExists("rename", col.path :+ newName, table.schema)
16241624

1625-
case a @ AlterColumn(table: ResolvedTable, col: ResolvedFieldName, _, _, _, _, _) =>
1626-
val fieldName = col.name.quoted
1627-
if (a.dataType.isDefined) {
1628-
val field = CharVarcharUtils.getRawType(col.field.metadata)
1629-
.map(dt => col.field.copy(dataType = dt))
1630-
.getOrElse(col.field)
1631-
val newDataType = a.dataType.get
1632-
newDataType match {
1633-
case _: StructType => alter.failAnalysis(
1634-
"CANNOT_UPDATE_FIELD.STRUCT_TYPE",
1635-
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1636-
case _: MapType => alter.failAnalysis(
1637-
"CANNOT_UPDATE_FIELD.MAP_TYPE",
1638-
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1639-
case _: ArrayType => alter.failAnalysis(
1640-
"CANNOT_UPDATE_FIELD.ARRAY_TYPE",
1641-
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1642-
case u: UserDefinedType[_] => alter.failAnalysis(
1643-
"CANNOT_UPDATE_FIELD.USER_DEFINED_TYPE",
1644-
Map(
1645-
"table" -> toSQLId(table.name),
1646-
"fieldName" -> toSQLId(fieldName),
1647-
"udtSql" -> toSQLType(u)))
1648-
case _: CalendarIntervalType | _: AnsiIntervalType => alter.failAnalysis(
1649-
"CANNOT_UPDATE_FIELD.INTERVAL_TYPE",
1650-
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1651-
case _ => // update is okay
1652-
}
1653-
1654-
// We don't need to handle nested types here which shall fail before.
1655-
def canAlterColumnType(from: DataType, to: DataType): Boolean = (from, to) match {
1656-
case (CharType(l1), CharType(l2)) => l1 == l2
1657-
case (CharType(l1), VarcharType(l2)) => l1 <= l2
1658-
case (VarcharType(l1), VarcharType(l2)) => l1 <= l2
1659-
case _ => Cast.canUpCast(from, to)
1660-
}
1661-
if (!canAlterColumnType(field.dataType, newDataType)) {
1625+
case AlterColumns(table: ResolvedTable, columns, specs) =>
1626+
val groupedColumns = columns.groupBy(_.name)
1627+
groupedColumns.collect {
1628+
case (name, occurrences) if occurrences.length > 1 =>
16621629
alter.failAnalysis(
1663-
errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
1630+
errorClass = "NOT_SUPPORTED_CHANGE_SAME_COLUMN",
16641631
messageParameters = Map(
16651632
"table" -> toSQLId(table.name),
1666-
"originName" -> toSQLId(fieldName),
1667-
"originType" -> toSQLType(field.dataType),
1668-
"newName" -> toSQLId(fieldName),
1669-
"newType" -> toSQLType(newDataType)))
1670-
}
1633+
"fieldName" -> toSQLId(name)))
16711634
}
1672-
if (a.nullable.isDefined) {
1673-
if (!a.nullable.get && col.field.nullable) {
1635+
groupedColumns.keys.foreach { name =>
1636+
val child = groupedColumns.keys.find(child => child != name && child.startsWith(name))
1637+
if (child.isDefined) {
16741638
alter.failAnalysis(
1675-
errorClass = "_LEGACY_ERROR_TEMP_2330",
1676-
messageParameters = Map("fieldName" -> fieldName))
1639+
errorClass = "NOT_SUPPORTED_CHANGE_PARENT_CHILD_COLUMN",
1640+
messageParameters = Map(
1641+
"table" -> toSQLId(table.name),
1642+
"parentField" -> toSQLId(name),
1643+
"childField" -> toSQLId(child.get)))
16771644
}
16781645
}
1646+
columns.zip(specs).foreach {
1647+
case (col: ResolvedFieldName, a @ AlterColumnSpec(_, _, _, _, _)) =>
1648+
val fieldName = col.name.quoted
1649+
if (a.dataType.isDefined) {
1650+
val field = CharVarcharUtils.getRawType(col.field.metadata)
1651+
.map(dt => col.field.copy(dataType = dt))
1652+
.getOrElse(col.field)
1653+
val newDataType = a.dataType.get
1654+
newDataType match {
1655+
case _: StructType => alter.failAnalysis(
1656+
"CANNOT_UPDATE_FIELD.STRUCT_TYPE",
1657+
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1658+
case _: MapType => alter.failAnalysis(
1659+
"CANNOT_UPDATE_FIELD.MAP_TYPE",
1660+
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1661+
case _: ArrayType => alter.failAnalysis(
1662+
"CANNOT_UPDATE_FIELD.ARRAY_TYPE",
1663+
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1664+
case u: UserDefinedType[_] => alter.failAnalysis(
1665+
"CANNOT_UPDATE_FIELD.USER_DEFINED_TYPE",
1666+
Map(
1667+
"table" -> toSQLId(table.name),
1668+
"fieldName" -> toSQLId(fieldName),
1669+
"udtSql" -> toSQLType(u)))
1670+
case _: CalendarIntervalType | _: AnsiIntervalType => alter.failAnalysis(
1671+
"CANNOT_UPDATE_FIELD.INTERVAL_TYPE",
1672+
Map("table" -> toSQLId(table.name), "fieldName" -> toSQLId(fieldName)))
1673+
case _ => // update is okay
1674+
}
1675+
1676+
// We don't need to handle nested types here which shall fail before.
1677+
def canAlterColumnType(from: DataType, to: DataType): Boolean = (from, to) match {
1678+
case (CharType(l1), CharType(l2)) => l1 == l2
1679+
case (CharType(l1), VarcharType(l2)) => l1 <= l2
1680+
case (VarcharType(l1), VarcharType(l2)) => l1 <= l2
1681+
case _ => Cast.canUpCast(from, to)
1682+
}
1683+
if (!canAlterColumnType(field.dataType, newDataType)) {
1684+
alter.failAnalysis(
1685+
errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
1686+
messageParameters = Map(
1687+
"table" -> toSQLId(table.name),
1688+
"originName" -> toSQLId(fieldName),
1689+
"originType" -> toSQLType(field.dataType),
1690+
"newName" -> toSQLId(fieldName),
1691+
"newType" -> toSQLType(newDataType)))
1692+
}
1693+
}
1694+
if (a.nullable.isDefined) {
1695+
if (!a.nullable.get && col.field.nullable) {
1696+
alter.failAnalysis(
1697+
errorClass = "_LEGACY_ERROR_TEMP_2330",
1698+
messageParameters = Map("fieldName" -> fieldName))
1699+
}
1700+
}
1701+
case _ =>
1702+
}
16791703
case _ =>
16801704
}
16811705
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultStringTypes.scala

+9-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.analysis
1919

2020
import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, Literal}
21-
import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumn, AlterViewAs, ColumnDefinition, CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, V1CreateTablePlan, V2CreateTablePlan}
21+
import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumns, AlterColumnSpec, AlterViewAs, ColumnDefinition, CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, V1CreateTablePlan, V2CreateTablePlan}
2222
import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
2323
import org.apache.spark.sql.types.{DataType, StringType}
2424

@@ -93,7 +93,7 @@ object ResolveDefaultStringTypes extends Rule[LogicalPlan] {
9393
StringType(conf.defaultStringType.collationId)
9494

9595
private def isDDLCommand(plan: LogicalPlan): Boolean = plan exists {
96-
case _: AddColumns | _: ReplaceColumns | _: AlterColumn => true
96+
case _: AddColumns | _: ReplaceColumns | _: AlterColumns => true
9797
case _ => isCreateOrAlterPlan(plan)
9898
}
9999

@@ -115,9 +115,13 @@ object ResolveDefaultStringTypes extends Rule[LogicalPlan] {
115115
case replaceCols: ReplaceColumns =>
116116
replaceCols.copy(columnsToAdd = replaceColumnTypes(replaceCols.columnsToAdd, newType))
117117

118-
case alter: AlterColumn
119-
if alter.dataType.isDefined && hasDefaultStringType(alter.dataType.get) =>
120-
alter.copy(dataType = Some(replaceDefaultStringType(alter.dataType.get, newType)))
118+
case a @ AlterColumns(_, _, specs: Seq[AlterColumnSpec]) =>
119+
val newSpecs = specs.map {
120+
case spec if spec.dataType.isDefined && hasDefaultStringType(spec.dataType.get) =>
121+
spec.copy(dataType = Some(replaceDefaultStringType(spec.dataType.get, newType)))
122+
case col => col
123+
}
124+
a.copy(specs = newSpecs)
121125
}
122126
}
123127

0 commit comments

Comments
 (0)