[SPARK-49567][PYTHON] Use classic instead of vanilla from PySpark code base

itholic · HyukjinKwon · commit a69c5eaa7f53 · 2024-09-10T09:40:44.000+09:00
### What changes were proposed in this pull request? This PR proposes to use classic instead of vanilla from PySpark code base ### Why are the changes needed? For unifying the terms to avoid confusion on overall code base including docs ### Does this PR introduce _any_ user-facing change? No API changes, but user-facing `pyspark.sql.DataFrame.offset` docs use the terms `classic` instead of `vanilla` ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? No Closes #48044 from itholic/vanilla_classic. Authored-by: Haejoon Lee <haejoon.lee@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
@@ -1783,7 +1783,7 @@ def __getitem__(
                     )
                 )
             else:
-                # TODO: revisit vanilla Spark's Dataset.col
+                # TODO: revisit classic Spark's Dataset.col
                 # if (sparkSession.sessionState.conf.supportQuotedRegexColumnName) {
                 #   colRegex(colName)
                 # } else {
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
@@ -101,7 +101,7 @@ def __init__(
 
     def __repr__(self) -> str:
         # the expressions are not resolved here,
-        # so the string representation can be different from vanilla PySpark.
+        # so the string representation can be different from classic PySpark.
         grouping_str = ", ".join(str(e._expr) for e in self._grouping_cols)
         grouping_str = f"grouping expressions: [{grouping_str}]"
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -1332,7 +1332,7 @@ def offset(self, num: int) -> "DataFrame":
         .. versionadded:: 3.4.0
 
         .. versionchanged:: 3.5.0
-            Supports vanilla PySpark.
+            Supports classic PySpark.
 
         Parameters
         ----------
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -176,7 +176,7 @@ def test_slow_query(self):
 
     def test_listener_throw(self):
         """
-        Following Vanilla Spark's behavior, when the callback of user-defined listener throws,
+        Following classic Spark's behavior, when the callback of user-defined listener throws,
         other listeners should still proceed.
         """
 
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -2572,7 +2572,7 @@ def test_function_parity(self):
 
         cf_fn = {name for (name, value) in getmembers(CF, isfunction) if name[0] != "_"}
 
-        # Functions in vanilla PySpark we do not expect to be available in Spark Connect
+        # Functions in classic PySpark we do not expect to be available in Spark Connect
         sf_excluded_fn = set()
 
         self.assertEqual(
@@ -2581,15 +2581,15 @@ def test_function_parity(self):
             "Missing functions in Spark Connect not as expected",
         )
 
-        # Functions in Spark Connect we do not expect to be available in vanilla PySpark
+        # Functions in Spark Connect we do not expect to be available in classic PySpark
         cf_excluded_fn = {
             "check_dependencies",  # internal helper function
         }
 
         self.assertEqual(
             cf_fn - sf_fn,
             cf_excluded_fn,
-            "Missing functions in vanilla PySpark not as expected",
+            "Missing functions in classic PySpark not as expected",
         )
 
     # SPARK-45216: Fix non-deterministic seeded Dataset APIs
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
@@ -185,7 +185,7 @@ def setUpClass(cls):
     def tearDownClass(cls):
         cls.sc.stop()
 
-    def test_assert_vanilla_mode(self):
+    def test_assert_classic_mode(self):
         from pyspark.sql import is_remote
 
         self.assertFalse(is_remote())

Original file line number	Diff line number	Diff line change
`@@ -1783,7 +1783,7 @@ def __getitem__(`
`1783`	`1783`	`)`
`1784`	`1784`	`)`
`1785`	`1785`	`else:`
`1786`		`- # TODO: revisit vanilla Spark's Dataset.col`
	`1786`	`+ # TODO: revisit classic Spark's Dataset.col`
`1787`	`1787`	`# if (sparkSession.sessionState.conf.supportQuotedRegexColumnName) {`
`1788`	`1788`	`# colRegex(colName)`
`1789`	`1789`	`# } else {`