From e95e820ac137b250f234080b4ece13fb66477295 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 31 Jan 2024 16:08:14 +0800 Subject: [PATCH] [SPARK-46932] Clean up the imports in `pyspark.pandas.test_*` ### What changes were proposed in this pull request? 1, remove unused import, variables 2, avoid double definition of variables ### Why are the changes needed? code clean up ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #44967 from zhengruifeng/ps_test_cleanup. Authored-by: Ruifeng Zheng Signed-off-by: Ruifeng Zheng --- .../tests/connect/test_parity_categorical.py | 10 ++++----- .../tests/connect/test_parity_extension.py | 21 ++++++------------- .../tests/connect/test_parity_numpy_compat.py | 20 ++++++------------ .../pyspark/pandas/tests/test_categorical.py | 12 +++++++++-- python/pyspark/pandas/tests/test_extension.py | 11 ++++++++-- .../pyspark/pandas/tests/test_numpy_compat.py | 12 +++++++++-- 6 files changed, 46 insertions(+), 40 deletions(-) diff --git a/python/pyspark/pandas/tests/connect/test_parity_categorical.py b/python/pyspark/pandas/tests/connect/test_parity_categorical.py index ca880aef57247..9e070a0494b9a 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_categorical.py +++ b/python/pyspark/pandas/tests/connect/test_parity_categorical.py @@ -16,18 +16,18 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.test_categorical import CategoricalTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class CategoricalParityTests( - CategoricalTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase + CategoricalTestsMixin, + PandasOnSparkTestUtils, + TestUtils, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/test_parity_extension.py b/python/pyspark/pandas/tests/connect/test_parity_extension.py index 7413801d3f8f1..53417f6ad005d 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_extension.py +++ b/python/pyspark/pandas/tests/connect/test_parity_extension.py @@ -16,26 +16,17 @@ # import unittest -import pandas as pd -import numpy as np - -from pyspark import pandas as ps from pyspark.pandas.tests.test_extension import ExtensionTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils -class ExtensionParityTests(ExtensionTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @property - def pdf(self): - return pd.DataFrame( - {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]}, - index=np.random.rand(9), - ) - - @property - def psdf(self): - return ps.from_pandas(self.pdf) +class ExtensionParityTests( + ExtensionTestsMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py index 6cc0a277718c2..1b713d814e5d7 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +++ b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py @@ -16,25 +16,17 @@ # import unittest -import pandas as pd - -from pyspark import pandas as ps from pyspark.pandas.tests.test_numpy_compat import NumPyCompatTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils -class NumPyCompatParityTests(NumPyCompatTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @property - def pdf(self): - return pd.DataFrame( - {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]}, - index=[0, 1, 3, 5, 6, 8, 9, 9, 9], - ) - - @property - def psdf(self): - return ps.from_pandas(self.pdf) +class NumPyCompatParityTests( + NumPyCompatTestsMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/test_categorical.py b/python/pyspark/pandas/tests/test_categorical.py index ec02f59859b88..2730723b27e35 100644 --- a/python/pyspark/pandas/tests/test_categorical.py +++ b/python/pyspark/pandas/tests/test_categorical.py @@ -20,7 +20,7 @@ from pandas.api.types import CategoricalDtype import pyspark.pandas as ps -from pyspark.testing.pandasutils import ComparisonTestBase, TestUtils +from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils class CategoricalTestsMixin: @@ -35,6 +35,10 @@ def pdf(self): }, ) + @property + def psdf(self): + return ps.from_pandas(self.pdf) + @property def df_pair(self): return self.pdf, self.psdf @@ -658,7 +662,11 @@ def test_set_categories(self): ) -class CategoricalTests(CategoricalTestsMixin, ComparisonTestBase, TestUtils): +class CategoricalTests( + CategoricalTestsMixin, + PandasOnSparkTestCase, + TestUtils, +): pass diff --git a/python/pyspark/pandas/tests/test_extension.py b/python/pyspark/pandas/tests/test_extension.py index fba850cb120a5..817046c3b7ead 100644 --- a/python/pyspark/pandas/tests/test_extension.py +++ b/python/pyspark/pandas/tests/test_extension.py @@ -21,7 +21,7 @@ import pandas as pd from pyspark import pandas as ps -from pyspark.testing.pandasutils import assert_produces_warning, ComparisonTestBase +from pyspark.testing.pandasutils import assert_produces_warning, PandasOnSparkTestCase from pyspark.pandas.extensions import ( register_dataframe_accessor, register_series_accessor, @@ -74,6 +74,10 @@ def pdf(self): index=np.random.rand(9), ) + @property + def psdf(self): + return ps.from_pandas(self.pdf) + @property def accessor(self): return CustomAccessor(self.psdf) @@ -135,7 +139,10 @@ def __init__(self, data): ps.Series([1, 2], dtype=object).bad -class ExtensionTests(ExtensionTestsMixin, ComparisonTestBase): +class ExtensionTests( + ExtensionTestsMixin, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/test_numpy_compat.py b/python/pyspark/pandas/tests/test_numpy_compat.py index 931e5475c36fa..50115a9f03169 100644 --- a/python/pyspark/pandas/tests/test_numpy_compat.py +++ b/python/pyspark/pandas/tests/test_numpy_compat.py @@ -20,7 +20,7 @@ from pyspark import pandas as ps from pyspark.pandas import set_option, reset_option -from pyspark.testing.pandasutils import ComparisonTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.testing.sqlutils import SQLTestUtils @@ -46,6 +46,10 @@ def pdf(self): index=[0, 1, 3, 5, 6, 8, 9, 9, 9], ) + @property + def psdf(self): + return ps.from_pandas(self.pdf) + def test_np_add_series(self): psdf = self.psdf pdf = self.pdf @@ -178,7 +182,11 @@ def test_np_spark_compat_frame(self): reset_option("compute.ops_on_diff_frames") -class NumPyCompatTests(NumPyCompatTestsMixin, ComparisonTestBase, SQLTestUtils): +class NumPyCompatTests( + NumPyCompatTestsMixin, + PandasOnSparkTestCase, + SQLTestUtils, +): pass