Skip to content

Commit f3e1623

Browse files
committed
[SPARK-46543][PYTHON][CONNECT] Make json_tuple throw PySparkValueError for empty fields
### What changes were proposed in this pull request? Make `json_tuple` throw PySparkValueError for empty fields ### Why are the changes needed? Python side should have the same check as the Scala side: https://github.com/apache/spark/blob/fa4096eb6aba4c66f0d9c5dcbabdfc0804064fff/sql/core/src/main/scala/org/apache/spark/sql/functions.scala#L6330-L6334 ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? added ut ### Was this patch authored or co-authored using generative AI tooling? no Closes #44534 from zhengruifeng/py_check_functions. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Ruifeng Zheng <[email protected]>
1 parent e608211 commit f3e1623

File tree

3 files changed

+24
-0
lines changed

3 files changed

+24
-0
lines changed

python/pyspark/sql/connect/functions/builtin.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1928,6 +1928,11 @@ def inline_outer(col: "ColumnOrName") -> Column:
19281928

19291929

19301930
def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
1931+
if len(fields) == 0:
1932+
raise PySparkValueError(
1933+
error_class="CANNOT_BE_EMPTY",
1934+
message_parameters={"item": "field"},
1935+
)
19311936
return _invoke_function("json_tuple", _to_col(col), *[lit(field) for field in fields])
19321937

19331938

python/pyspark/sql/functions/builtin.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14246,6 +14246,11 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
1424614246
>>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect()
1424714247
[Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)]
1424814248
"""
14249+
if len(fields) == 0:
14250+
raise PySparkValueError(
14251+
error_class="CANNOT_BE_EMPTY",
14252+
message_parameters={"item": "field"},
14253+
)
1424914254
sc = _get_active_spark_context()
1425014255
return _invoke_function("json_tuple", _to_java_column(col), _to_seq(sc, fields))
1425114256

python/pyspark/sql/tests/test_functions.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,6 +1452,20 @@ def test_current_timestamp(self):
14521452
self.assertIsInstance(df.first()[0], datetime.datetime)
14531453
self.assertEqual(df.schema.names[0], "now()")
14541454

1455+
def test_json_tuple_empty_fields(self):
1456+
df = self.spark.createDataFrame(
1457+
[
1458+
("1", """{"f1": "value1", "f2": "value2"}"""),
1459+
("2", """{"f1": "value12"}"""),
1460+
],
1461+
("key", "jstring"),
1462+
)
1463+
self.assertRaisesRegex(
1464+
PySparkValueError,
1465+
"At least one field must be specified",
1466+
lambda: df.select(F.json_tuple(df.jstring)),
1467+
)
1468+
14551469

14561470
class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
14571471
pass

0 commit comments

Comments
 (0)