Skip to content

Commit

Permalink
[SPARK-51307][SQL] locationUri in CatalogStorageFormat shall be decod…
Browse files Browse the repository at this point in the history
…ed for display

### What changes were proposed in this pull request?

This PR uses CatalogUtils.URIToString instead of URI.toString to decode the location URI.

### Why are the changes needed?

For example, for partition specs like test1=X'16', test3=timestamp'2018-11-17 13:33:33', the stored path will include them as `test1=%16/test3=2018-11-17 13%3A33%3A33` because the special characters are escaped. Furthermore, while resolving the whole path string to a URI object, this path fragment becomes `test1=%2516/test3=2018-11-17 13%253A33%253A33`, so we need to decode `%25` -> `%` before displaying to users

### Does this PR introduce _any_ user-facing change?
yes, DESC TABLE  will not show 2x-encoded paths.

### How was this patch tested?
new tests

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #50074 from yaooqinn/SPARK-51307.

Authored-by: Kent Yao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
yaooqinn authored and dongjoon-hyun committed Mar 4, 2025
1 parent 229be37 commit eb71443
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ case class CatalogStorageFormat(
def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
val map = mutable.LinkedHashMap[String, JValue]()

locationUri.foreach(l => map += ("Location" -> JString(l.toString)))
locationUri.foreach(l => map += ("Location" -> JString(CatalogUtils.URIToString(l))))
serde.foreach(s => map += ("Serde Library" -> JString(s)))
inputFormat.foreach(format => map += ("InputFormat" -> JString(format)))
outputFormat.foreach(format => map += ("OutputFormat" -> JString(format)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,27 @@ DESC FORMATTED e
DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, data_type#x, comment#x]


-- !query
CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
-- !query analysis
CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`f`, ErrorIfExists, [A, B, C]
+- Project [APACHE AS A#x, cast(SPARK as binary) AS B#x, 2018-11-17 13:33:33 AS C#x]
+- OneRowRelation


-- !query
DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
-- !query analysis
DescribeTableCommand `spark_catalog`.`default`.`f`, [B=SPARK, C=2018-11-17 13:33:33], true, [col_name#x, data_type#x, comment#x]


-- !query
DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON
-- !query analysis
DescribeRelationJsonCommand [B=SPARK, C=2018-11-17 13:33:33], true, [json_metadata#x]
+- ResolvedTable V2SessionCatalog(spark_catalog), default.f, V1Table(default.f), [A#x, B#x, C#x]


-- !query
DROP VIEW temp_v
-- !query analysis
Expand Down Expand Up @@ -430,3 +451,10 @@ DROP TABLE e
-- !query analysis
DropTable false, false
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.e


-- !query
DROP TABLE f
-- !query analysis
DropTable false, false
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.f
8 changes: 8 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/describe.sql
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ DESC TABLE EXTENDED e;

DESC FORMATTED e;

CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C;

DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33');

DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON;

-- DROP TEST TABLES/VIEWS

DROP VIEW temp_v;
Expand All @@ -135,3 +141,5 @@ DROP TABLE t;
DROP TABLE d;

DROP TABLE e;

DROP TABLE f;
50 changes: 50 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/describe.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,48 @@ a string CONCAT('a\n b\n ', 'c\n d')
b int 42


-- !query
CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
-- !query schema
struct<>
-- !query output



-- !query
DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
-- !query schema
struct<col_name:string,data_type:string,comment:string>
-- !query output
A string
B binary
C timestamp
# Partition Information
# col_name data_type comment
B binary
C timestamp

# Detailed Partition Information
Database default
Table f
Partition Values [B=SPARK, C=2018-11-17 13:33:33]
Location [not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33
Partition Parameters [numFiles=1, totalSize=15, transient_lastDdlTime=[not included in comparison]]
Created Time [not included in comparison]
Last Access [not included in comparison]

# Storage Information
Location [not included in comparison]/{warehouse_dir}/f


-- !query
DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON
-- !query schema
struct<json_metadata:string>
-- !query output
{"table_name":"f","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"A","type":{"name":"string"},"nullable":true},{"name":"B","type":{"name":"binary"},"nullable":true},{"name":"C","type":{"name":"timestamp_ltz"},"nullable":true}],"partition_values":{"B":"SPARK","C":"2018-11-17 13:33:33"},"location":"file:[not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33","partition_parameters":{"numFiles":"1","totalSize":"15","transient_lastDdlTime [not included in comparison]":"None"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"json","partition_provider":"Catalog","partition_columns":["B","C"]}


-- !query
DROP VIEW temp_v
-- !query schema
Expand Down Expand Up @@ -936,3 +978,11 @@ DROP TABLE e
struct<>
-- !query output



-- !query
DROP TABLE f
-- !query schema
struct<>
-- !query output

Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ trait SQLQueryTestHelper extends Logging {
s""""location": "$notIncludedMsg/{warehouse_dir}/""")
.replaceAll(s""""created_by":".*?"""", s""""created_by $notIncludedMsg":"None"""")
.replaceAll(s""""created_time":".*?"""", s""""created_time $notIncludedMsg":"None"""")
.replaceAll(s"transient_lastDdlTime=\\d+", s"transient_lastDdlTime=$notIncludedMsg")
.replaceAll(s""""transient_lastDdlTime":"\\d+"""",
s""""transient_lastDdlTime $notIncludedMsg":"None"""")
.replaceAll(s""""last_access":".*?"""", s""""last_access $notIncludedMsg":"None"""")
.replaceAll(s""""owner":".*?"""", s""""owner $notIncludedMsg":"None"""")
.replaceAll(s""""partition_statistics":"\\d+"""",
Expand Down

0 comments on commit eb71443

Please sign in to comment.