From e95e820ac137b250f234080b4ece13fb66477295 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Wed, 31 Jan 2024 16:08:14 +0800
Subject: [PATCH] [SPARK-46932] Clean up the imports in `pyspark.pandas.test_*`

### What changes were proposed in this pull request?
1, remove unused import, variables
2, avoid double definition of variables

### Why are the changes needed?
code clean up

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #44967 from zhengruifeng/ps_test_cleanup.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 .../tests/connect/test_parity_categorical.py  | 10 ++++-----
 .../tests/connect/test_parity_extension.py    | 21 ++++++-------------
 .../tests/connect/test_parity_numpy_compat.py | 20 ++++++------------
 .../pyspark/pandas/tests/test_categorical.py  | 12 +++++++++--
 python/pyspark/pandas/tests/test_extension.py | 11 ++++++++--
 .../pyspark/pandas/tests/test_numpy_compat.py | 12 +++++++++--
 6 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/python/pyspark/pandas/tests/connect/test_parity_categorical.py b/python/pyspark/pandas/tests/connect/test_parity_categorical.py
index ca880aef57247..9e070a0494b9a 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_categorical.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_categorical.py
@@ -16,18 +16,18 @@
 #
 import unittest
 
-from pyspark import pandas as ps
 from pyspark.pandas.tests.test_categorical import CategoricalTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
 
 
 class CategoricalParityTests(
-    CategoricalTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
+    CategoricalTestsMixin,
+    PandasOnSparkTestUtils,
+    TestUtils,
+    ReusedConnectTestCase,
 ):
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/connect/test_parity_extension.py b/python/pyspark/pandas/tests/connect/test_parity_extension.py
index 7413801d3f8f1..53417f6ad005d 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_extension.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_extension.py
@@ -16,26 +16,17 @@
 #
 import unittest
 
-import pandas as pd
-import numpy as np
-
-from pyspark import pandas as ps
 from pyspark.pandas.tests.test_extension import ExtensionTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class ExtensionParityTests(ExtensionTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase):
-    @property
-    def pdf(self):
-        return pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
-            index=np.random.rand(9),
-        )
-
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+class ExtensionParityTests(
+    ExtensionTestsMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
index 6cc0a277718c2..1b713d814e5d7 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
@@ -16,25 +16,17 @@
 #
 import unittest
 
-import pandas as pd
-
-from pyspark import pandas as ps
 from pyspark.pandas.tests.test_numpy_compat import NumPyCompatTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class NumPyCompatParityTests(NumPyCompatTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase):
-    @property
-    def pdf(self):
-        return pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
-            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
-        )
-
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+class NumPyCompatParityTests(
+    NumPyCompatTestsMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/test_categorical.py b/python/pyspark/pandas/tests/test_categorical.py
index ec02f59859b88..2730723b27e35 100644
--- a/python/pyspark/pandas/tests/test_categorical.py
+++ b/python/pyspark/pandas/tests/test_categorical.py
@@ -20,7 +20,7 @@
 from pandas.api.types import CategoricalDtype
 
 import pyspark.pandas as ps
-from pyspark.testing.pandasutils import ComparisonTestBase, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
 
 
 class CategoricalTestsMixin:
@@ -35,6 +35,10 @@ def pdf(self):
             },
         )
 
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
     @property
     def df_pair(self):
         return self.pdf, self.psdf
@@ -658,7 +662,11 @@ def test_set_categories(self):
         )
 
 
-class CategoricalTests(CategoricalTestsMixin, ComparisonTestBase, TestUtils):
+class CategoricalTests(
+    CategoricalTestsMixin,
+    PandasOnSparkTestCase,
+    TestUtils,
+):
     pass
 
 
diff --git a/python/pyspark/pandas/tests/test_extension.py b/python/pyspark/pandas/tests/test_extension.py
index fba850cb120a5..817046c3b7ead 100644
--- a/python/pyspark/pandas/tests/test_extension.py
+++ b/python/pyspark/pandas/tests/test_extension.py
@@ -21,7 +21,7 @@
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.testing.pandasutils import assert_produces_warning, ComparisonTestBase
+from pyspark.testing.pandasutils import assert_produces_warning, PandasOnSparkTestCase
 from pyspark.pandas.extensions import (
     register_dataframe_accessor,
     register_series_accessor,
@@ -74,6 +74,10 @@ def pdf(self):
             index=np.random.rand(9),
         )
 
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
     @property
     def accessor(self):
         return CustomAccessor(self.psdf)
@@ -135,7 +139,10 @@ def __init__(self, data):
                 ps.Series([1, 2], dtype=object).bad
 
 
-class ExtensionTests(ExtensionTestsMixin, ComparisonTestBase):
+class ExtensionTests(
+    ExtensionTestsMixin,
+    PandasOnSparkTestCase,
+):
     pass
 
 
diff --git a/python/pyspark/pandas/tests/test_numpy_compat.py b/python/pyspark/pandas/tests/test_numpy_compat.py
index 931e5475c36fa..50115a9f03169 100644
--- a/python/pyspark/pandas/tests/test_numpy_compat.py
+++ b/python/pyspark/pandas/tests/test_numpy_compat.py
@@ -20,7 +20,7 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas import set_option, reset_option
-from pyspark.testing.pandasutils import ComparisonTestBase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
 
 
@@ -46,6 +46,10 @@ def pdf(self):
             index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
         )
 
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
     def test_np_add_series(self):
         psdf = self.psdf
         pdf = self.pdf
@@ -178,7 +182,11 @@ def test_np_spark_compat_frame(self):
             reset_option("compute.ops_on_diff_frames")
 
 
-class NumPyCompatTests(NumPyCompatTestsMixin, ComparisonTestBase, SQLTestUtils):
+class NumPyCompatTests(
+    NumPyCompatTestsMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
     pass