Skip to content

Commit 13945c8

Browse files
asl3gengliangwang
authored andcommitted
[SPARK-51657] UTF8_BINARY default table collation shown by default in Desc As JSON (v1)
### What changes were proposed in this pull request? Display `UTF8_BINARY` default table collation by default in Desc As JSON for v1 table. Note this only affects the `desc as json` output and not `desc table extended` -- desc table does NOT always show the top-level table collation (if omitted in the`desc table` output, it is `UTF8_BINARY`) ### Why are the changes needed? If the CatalogTable `collation` property is None, then it is by default `UTF8_BINARY`. This update is for completeness for the `desc as json` output. ### Does this PR introduce _any_ user-facing change? Yes, affects the JSON result ### How was this patch tested? Added tests in `v1/DescribeTableSuite.scala` ### Was this patch authored or co-authored using generative AI tooling? No Closes #50451 from asl3/asl3/descasjson-defaultcollation. Authored-by: Amanda Liu <[email protected]> Signed-off-by: Gengliang Wang <[email protected]>
1 parent 52bc072 commit 13945c8

File tree

3 files changed

+36
-27
lines changed

3 files changed

+36
-27
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ case class DescribeRelationJsonCommand(
9999
case _ => throw QueryCompilationErrors.describeAsJsonNotSupportedForV2TablesError()
100100
}
101101

102+
// Add default collation if not yet added (addKeyValueToMap only adds unique keys).
103+
// Add here to only affect `DESC AS JSON` and not the `DESC TABLE` output.
104+
addKeyValueToMap("collation", JString("UTF8_BINARY"), jsonMap)
105+
102106
Seq(Row(compact(render(JObject(jsonMap.toList)))))
103107
}
104108

sql/core/src/test/resources/sql-tests/results/describe.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ DESCRIBE EXTENDED t AS JSON
7676
-- !query schema
7777
struct<json_metadata:string>
7878
-- !query output
79-
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not included in comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","comment":"table_comment","table_properties":{"e":"3","password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]}
79+
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not included in comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","comment":"table_comment","table_properties":{"e":"3","password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"],"collation":"UTF8_BINARY"}
8080

8181

8282
-- !query
@@ -303,7 +303,7 @@ DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON
303303
-- !query schema
304304
struct<json_metadata:string>
305305
-- !query output
306-
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not included in comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"table_properties":{"password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]}
306+
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not included in comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"table_properties":{"password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"],"collation":"UTF8_BINARY"}
307307

308308

309309
-- !query
@@ -929,7 +929,7 @@ DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS
929929
-- !query schema
930930
struct<json_metadata:string>
931931
-- !query output
932-
{"table_name":"f","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"A","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"B","type":{"name":"binary"},"nullable":true},{"name":"C","type":{"name":"timestamp_ltz"},"nullable":true}],"partition_values":{"B":"SPARK","C":"2018-11-17 13:33:33"},"location":"file:[not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33","partition_parameters":{"numFiles":"1","totalSize":"15","transient_lastDdlTime [not included in comparison]":"None"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"json","partition_provider":"Catalog","partition_columns":["B","C"]}
932+
{"table_name":"f","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"A","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"B","type":{"name":"binary"},"nullable":true},{"name":"C","type":{"name":"timestamp_ltz"},"nullable":true}],"partition_values":{"B":"SPARK","C":"2018-11-17 13:33:33"},"location":"file:[not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33","partition_parameters":{"numFiles":"1","totalSize":"15","transient_lastDdlTime [not included in comparison]":"None"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"json","partition_provider":"Catalog","partition_columns":["B","C"],"collation":"UTF8_BINARY"}
933933

934934

935935
-- !query

sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
273273
last_access = Some("UNKNOWN"),
274274
created_by = Some(s"Spark $SPARK_VERSION"),
275275
`type` = Some("MANAGED"),
276+
collation = Some("UTF8_BINARY"),
276277
provider = Some("parquet"),
277278
bucket_columns = Some(List("employee_id")),
278279
sort_columns = Some(List("employee_name")),
@@ -338,6 +339,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
338339
last_access = Some("UNKNOWN"),
339340
created_by = Some(s"Spark $SPARK_VERSION"),
340341
`type` = Some("MANAGED"),
342+
collation = Some("UTF8_BINARY"),
341343
provider = Some("parquet"),
342344
bucket_columns = Some(Nil),
343345
sort_columns = Some(Nil),
@@ -402,6 +404,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
402404
last_access = Some("UNKNOWN"),
403405
created_by = Some(s"Spark $SPARK_VERSION"),
404406
`type` = Some("MANAGED"),
407+
collation = Some("UTF8_BINARY"),
405408
provider = Some("parquet"),
406409
bucket_columns = Some(Nil),
407410
sort_columns = Some(Nil),
@@ -437,7 +440,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
437440
| id INT
438441
|)
439442
|USING parquet COMMENT 'table_comment'
440-
|DEFAULT COLLATION UTF8_BINARY
443+
|DEFAULT COLLATION DE
441444
|""".stripMargin
442445
spark.sql(tableCreationStr)
443446

@@ -455,12 +458,12 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
455458
TableColumn("c1", Type("string", collation = Some("UNICODE_CI"))),
456459
TableColumn("c2", Type("string", collation = Some("UNICODE_RTRIM"))),
457460
TableColumn("c3", Type("string", collation = Some("fr"))),
458-
TableColumn("c4", Type("string", collation = Some("UTF8_BINARY"))),
461+
TableColumn("c4", Type("string", collation = Some("de"))),
459462
TableColumn("id", Type("int")))),
460463
last_access = Some("UNKNOWN"),
461464
created_by = Some(s"Spark $SPARK_VERSION"),
462465
`type` = Some("MANAGED"),
463-
collation = Some("UTF8_BINARY"),
466+
collation = Some("de"),
464467
storage_properties = None,
465468
provider = Some("parquet"),
466469
bucket_columns = Some(Nil),
@@ -514,6 +517,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
514517
last_access = Some("UNKNOWN"),
515518
created_by = Some(s"Spark $SPARK_VERSION"),
516519
`type` = Some("MANAGED"),
520+
collation = Some("UTF8_BINARY"),
517521
storage_properties = None,
518522
provider = Some("parquet"),
519523
bucket_columns = Some(Nil),
@@ -557,27 +561,27 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
557561
val jsonValue = firstRow.getString(0)
558562
val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
559563

560-
val expectedOutput = DescribeTableJson(
561-
table_name = Some("view"),
562-
catalog_name = if (isTemp) Some("system") else Some("spark_catalog"),
563-
namespace = if (isTemp) Some(List("session")) else Some(List("default")),
564-
schema_name = if (isTemp) Some("session") else Some("default"),
565-
columns = Some(List(
566-
TableColumn("id", Type("int")),
567-
TableColumn("name", Type("string", collation = Some("UTF8_BINARY"))),
568-
TableColumn("created_at", Type("timestamp_ltz"))
569-
)),
570-
last_access = Some("UNKNOWN"),
571-
created_by = Some(s"Spark $SPARK_VERSION"),
572-
`type` = Some("VIEW"),
573-
view_text = Some("SELECT * FROM spark_catalog.ns.table"),
574-
view_original_text =
575-
if (isTemp) None else Some("SELECT * FROM spark_catalog.ns.table"),
576-
// TODO: this is unexpected and temp view should also use COMPENSATION mode.
577-
view_schema_mode = if (isTemp) Some("BINDING") else Some("COMPENSATION"),
578-
view_catalog_and_namespace = Some("spark_catalog.default"),
579-
view_query_output_columns = Some(List("id", "name", "created_at"))
580-
)
564+
val expectedOutput = DescribeTableJson(
565+
table_name = Some("view"),
566+
catalog_name = if (isTemp) Some("system") else Some("spark_catalog"),
567+
namespace = if (isTemp) Some(List("session")) else Some(List("default")),
568+
schema_name = if (isTemp) Some("session") else Some("default"),
569+
columns = Some(List(
570+
TableColumn("id", Type("int")),
571+
TableColumn("name", Type("string", collation = Some("UTF8_BINARY"))),
572+
TableColumn("created_at", Type("timestamp_ltz"))
573+
)),
574+
last_access = Some("UNKNOWN"),
575+
created_by = Some(s"Spark $SPARK_VERSION"),
576+
`type` = Some("VIEW"),
577+
collation = Some("UTF8_BINARY"),
578+
view_text = Some("SELECT * FROM spark_catalog.ns.table"),
579+
view_original_text = if (isTemp) None else Some("SELECT * FROM spark_catalog.ns.table"),
580+
// TODO: this is unexpected and temp view should also use COMPENSATION mode.
581+
view_schema_mode = if (isTemp) Some("BINDING") else Some("COMPENSATION"),
582+
view_catalog_and_namespace = Some("spark_catalog.default"),
583+
view_query_output_columns = Some(List("id", "name", "created_at"))
584+
)
581585

582586
assert(iso8601Regex.matches(parsedOutput.created_time.get))
583587
assert(expectedOutput == parsedOutput.copy(
@@ -763,6 +767,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
763767
last_access = Some("UNKNOWN"),
764768
created_by = Some(s"Spark $SPARK_VERSION"),
765769
`type` = Some("MANAGED"),
770+
collation = Some("UTF8_BINARY"),
766771
provider = Some("parquet"),
767772
comment = Some("A table with nested complex types"),
768773
table_properties = Some(Map(

0 commit comments

Comments
 (0)