Skip to content

Commit 9b08924

Browse files
asl3gengliangwang
authored andcommitted
[SPARK-51657] UTF8_BINARY default table collation shown by default in Desc As JSON (v1)
### What changes were proposed in this pull request? Display `UTF8_BINARY` default table collation by default in Desc As JSON for v1 table. Note this only affects the `desc as json` output and not `desc table extended` -- desc table does NOT always show the top-level table collation (if omitted in the`desc table` output, it is `UTF8_BINARY`) ### Why are the changes needed? If the CatalogTable `collation` property is None, then it is by default `UTF8_BINARY`. This update is for completeness for the `desc as json` output. ### Does this PR introduce _any_ user-facing change? Yes, affects the JSON result ### How was this patch tested? Added tests in `v1/DescribeTableSuite.scala` ### Was this patch authored or co-authored using generative AI tooling? No Closes #50451 from asl3/asl3/descasjson-defaultcollation. Authored-by: Amanda Liu <[email protected]> Signed-off-by: Gengliang Wang <[email protected]> (cherry picked from commit 13945c8) Signed-off-by: Gengliang Wang <[email protected]>
1 parent e106766 commit 9b08924

File tree

3 files changed

+36
-27
lines changed

3 files changed

+36
-27
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala

+4
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ case class DescribeRelationJsonCommand(
9999
case _ => throw QueryCompilationErrors.describeAsJsonNotSupportedForV2TablesError()
100100
}
101101

102+
// Add default collation if not yet added (addKeyValueToMap only adds unique keys).
103+
// Add here to only affect `DESC AS JSON` and not the `DESC TABLE` output.
104+
addKeyValueToMap("collation", JString("UTF8_BINARY"), jsonMap)
105+
102106
Seq(Row(compact(render(JObject(jsonMap.toList)))))
103107
}
104108

sql/core/src/test/resources/sql-tests/results/describe.sql.out

+3-3
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ DESCRIBE EXTENDED t AS JSON
7676
-- !query schema
7777
struct<json_metadata:string>
7878
-- !query output
79-
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not included in comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","comment":"table_comment","table_properties":{"e":"3","password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]}
79+
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not included in comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","comment":"table_comment","table_properties":{"e":"3","password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"],"collation":"UTF8_BINARY"}
8080

8181

8282
-- !query
@@ -303,7 +303,7 @@ DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON
303303
-- !query schema
304304
struct<json_metadata:string>
305305
-- !query output
306-
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not included in comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"table_properties":{"password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]}
306+
{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"d","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not included in comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"table_properties":{"password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"],"collation":"UTF8_BINARY"}
307307

308308

309309
-- !query
@@ -929,7 +929,7 @@ DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS
929929
-- !query schema
930930
struct<json_metadata:string>
931931
-- !query output
932-
{"table_name":"f","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"A","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"B","type":{"name":"binary"},"nullable":true},{"name":"C","type":{"name":"timestamp_ltz"},"nullable":true}],"partition_values":{"B":"SPARK","C":"2018-11-17 13:33:33"},"location":"file:[not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33","partition_parameters":{"numFiles":"1","totalSize":"15","transient_lastDdlTime [not included in comparison]":"None"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"json","partition_provider":"Catalog","partition_columns":["B","C"]}
932+
{"table_name":"f","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"A","type":{"name":"string","collation":"UTF8_BINARY"},"nullable":true},{"name":"B","type":{"name":"binary"},"nullable":true},{"name":"C","type":{"name":"timestamp_ltz"},"nullable":true}],"partition_values":{"B":"SPARK","C":"2018-11-17 13:33:33"},"location":"file:[not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33","partition_parameters":{"numFiles":"1","totalSize":"15","transient_lastDdlTime [not included in comparison]":"None"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"json","partition_provider":"Catalog","partition_columns":["B","C"],"collation":"UTF8_BINARY"}
933933

934934

935935
-- !query

sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala

+29-24
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
248248
last_access = Some("UNKNOWN"),
249249
created_by = Some(s"Spark $SPARK_VERSION"),
250250
`type` = Some("MANAGED"),
251+
collation = Some("UTF8_BINARY"),
251252
provider = Some("parquet"),
252253
bucket_columns = Some(List("employee_id")),
253254
sort_columns = Some(List("employee_name")),
@@ -313,6 +314,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
313314
last_access = Some("UNKNOWN"),
314315
created_by = Some(s"Spark $SPARK_VERSION"),
315316
`type` = Some("MANAGED"),
317+
collation = Some("UTF8_BINARY"),
316318
provider = Some("parquet"),
317319
bucket_columns = Some(Nil),
318320
sort_columns = Some(Nil),
@@ -377,6 +379,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
377379
last_access = Some("UNKNOWN"),
378380
created_by = Some(s"Spark $SPARK_VERSION"),
379381
`type` = Some("MANAGED"),
382+
collation = Some("UTF8_BINARY"),
380383
provider = Some("parquet"),
381384
bucket_columns = Some(Nil),
382385
sort_columns = Some(Nil),
@@ -412,7 +415,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
412415
| id INT
413416
|)
414417
|USING parquet COMMENT 'table_comment'
415-
|DEFAULT COLLATION UTF8_BINARY
418+
|DEFAULT COLLATION DE
416419
|""".stripMargin
417420
spark.sql(tableCreationStr)
418421

@@ -430,12 +433,12 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
430433
TableColumn("c1", Type("string", collation = Some("UNICODE_CI"))),
431434
TableColumn("c2", Type("string", collation = Some("UNICODE_RTRIM"))),
432435
TableColumn("c3", Type("string", collation = Some("fr"))),
433-
TableColumn("c4", Type("string", collation = Some("UTF8_BINARY"))),
436+
TableColumn("c4", Type("string", collation = Some("de"))),
434437
TableColumn("id", Type("int")))),
435438
last_access = Some("UNKNOWN"),
436439
created_by = Some(s"Spark $SPARK_VERSION"),
437440
`type` = Some("MANAGED"),
438-
collation = Some("UTF8_BINARY"),
441+
collation = Some("de"),
439442
storage_properties = None,
440443
provider = Some("parquet"),
441444
bucket_columns = Some(Nil),
@@ -489,6 +492,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
489492
last_access = Some("UNKNOWN"),
490493
created_by = Some(s"Spark $SPARK_VERSION"),
491494
`type` = Some("MANAGED"),
495+
collation = Some("UTF8_BINARY"),
492496
storage_properties = None,
493497
provider = Some("parquet"),
494498
bucket_columns = Some(Nil),
@@ -532,27 +536,27 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
532536
val jsonValue = firstRow.getString(0)
533537
val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
534538

535-
val expectedOutput = DescribeTableJson(
536-
table_name = Some("view"),
537-
catalog_name = if (isTemp) Some("system") else Some("spark_catalog"),
538-
namespace = if (isTemp) Some(List("session")) else Some(List("default")),
539-
schema_name = if (isTemp) Some("session") else Some("default"),
540-
columns = Some(List(
541-
TableColumn("id", Type("int")),
542-
TableColumn("name", Type("string", collation = Some("UTF8_BINARY"))),
543-
TableColumn("created_at", Type("timestamp_ltz"))
544-
)),
545-
last_access = Some("UNKNOWN"),
546-
created_by = Some(s"Spark $SPARK_VERSION"),
547-
`type` = Some("VIEW"),
548-
view_text = Some("SELECT * FROM spark_catalog.ns.table"),
549-
view_original_text =
550-
if (isTemp) None else Some("SELECT * FROM spark_catalog.ns.table"),
551-
// TODO: this is unexpected and temp view should also use COMPENSATION mode.
552-
view_schema_mode = if (isTemp) Some("BINDING") else Some("COMPENSATION"),
553-
view_catalog_and_namespace = Some("spark_catalog.default"),
554-
view_query_output_columns = Some(List("id", "name", "created_at"))
555-
)
539+
val expectedOutput = DescribeTableJson(
540+
table_name = Some("view"),
541+
catalog_name = if (isTemp) Some("system") else Some("spark_catalog"),
542+
namespace = if (isTemp) Some(List("session")) else Some(List("default")),
543+
schema_name = if (isTemp) Some("session") else Some("default"),
544+
columns = Some(List(
545+
TableColumn("id", Type("int")),
546+
TableColumn("name", Type("string", collation = Some("UTF8_BINARY"))),
547+
TableColumn("created_at", Type("timestamp_ltz"))
548+
)),
549+
last_access = Some("UNKNOWN"),
550+
created_by = Some(s"Spark $SPARK_VERSION"),
551+
`type` = Some("VIEW"),
552+
collation = Some("UTF8_BINARY"),
553+
view_text = Some("SELECT * FROM spark_catalog.ns.table"),
554+
view_original_text = if (isTemp) None else Some("SELECT * FROM spark_catalog.ns.table"),
555+
// TODO: this is unexpected and temp view should also use COMPENSATION mode.
556+
view_schema_mode = if (isTemp) Some("BINDING") else Some("COMPENSATION"),
557+
view_catalog_and_namespace = Some("spark_catalog.default"),
558+
view_query_output_columns = Some(List("id", "name", "created_at"))
559+
)
556560

557561
assert(iso8601Regex.matches(parsedOutput.created_time.get))
558562
assert(expectedOutput == parsedOutput.copy(
@@ -738,6 +742,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
738742
last_access = Some("UNKNOWN"),
739743
created_by = Some(s"Spark $SPARK_VERSION"),
740744
`type` = Some("MANAGED"),
745+
collation = Some("UTF8_BINARY"),
741746
provider = Some("parquet"),
742747
comment = Some("A table with nested complex types"),
743748
table_properties = Some(Map(

0 commit comments

Comments
 (0)