[SPARK-46699][PS][TESTS] Split ArithmeticTests

### What changes were proposed in this pull request? Split `ArithmeticTests` ### Why are the changes needed? its parity test is slow ### Does this PR introduce _any_ user-facing change? no, test-only ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #44708 from zhengruifeng/ps_test_split_num_arithmetic. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
apache · Jan 13, 2024 · 1c9b022 · 1c9b022
1 parent 4ea3742
commit 1c9b022
Show file tree

Hide file tree

Showing 8 changed files with 391 additions and 93 deletions.
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -710,6 +710,9 @@ def __hash__(self):
         "pyspark.pandas.tests.data_type_ops.test_null_ops",
         "pyspark.pandas.tests.data_type_ops.test_num_ops",
         "pyspark.pandas.tests.data_type_ops.test_num_arithmetic",
+        "pyspark.pandas.tests.data_type_ops.test_num_mod",
+        "pyspark.pandas.tests.data_type_ops.test_num_mul_div",
+        "pyspark.pandas.tests.data_type_ops.test_num_pow",
         "pyspark.pandas.tests.data_type_ops.test_num_reverse",
         "pyspark.pandas.tests.data_type_ops.test_string_ops",
         "pyspark.pandas.tests.data_type_ops.test_udt_ops",
@@ -1201,6 +1204,9 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.series.test_parity_sort",
         "pyspark.pandas.tests.connect.series.test_parity_stat",
         "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_arithmetic",
+        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod",
+        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div",
+        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_pow",
         "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies",
         "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_kwargs",
         "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_multiindex",

diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_mod.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_mod.py
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.data_type_ops.test_num_mod import NumModTestsMixin
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class NumModParityTests(
+    NumModTestsMixin,
+    PandasOnSparkTestUtils,
+    OpsTestBase,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod import *  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_mul_div.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_mul_div.py
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.data_type_ops.test_num_mul_div import NumMulDivTestsMixin
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class NumMulDivParityTests(
+    NumMulDivTestsMixin,
+    PandasOnSparkTestUtils,
+    OpsTestBase,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div import *  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_pow.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_pow.py
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.data_type_ops.test_num_pow import NumPowTestsMixin
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class NumPowParityTests(
+    NumPowTestsMixin,
+    PandasOnSparkTestUtils,
+    OpsTestBase,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.data_type_ops.test_parity_num_pow import *  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py b/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py
@@ -18,7 +18,6 @@
 import unittest
 
 import pandas as pd
-import numpy as np
 
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -75,98 +74,6 @@ def test_sub(self):
                 else:
                     self.assertRaises(TypeError, lambda: psser - psdf[n_col])
 
-    def test_mul(self):
-        pdf, psdf = self.pdf, self.psdf
-        for col in self.numeric_df_cols:
-            pser, psser = pdf[col], psdf[col]
-            self.assert_eq(pser * pser, psser * psser)
-            self.assert_eq(pser * pser.astype(bool), psser * psser.astype(bool))
-            self.assert_eq(pser * True, psser * True)
-            self.assert_eq(pser * False, psser * False)
-
-            if psser.dtype in [int, np.int32]:
-                self.assert_eq(pser * pdf["string"], psser * psdf["string"])
-            else:
-                self.assertRaises(TypeError, lambda: psser * psdf["string"])
-
-            self.assert_eq(pser * pdf["bool"], psser * psdf["bool"])
-
-            self.assertRaises(TypeError, lambda: psser * psdf["datetime"])
-            self.assertRaises(TypeError, lambda: psser * psdf["date"])
-            self.assertRaises(TypeError, lambda: psser * psdf["categorical"])
-
-    def test_truediv(self):
-        pdf, psdf = self.pdf, self.psdf
-        for col in self.numeric_df_cols:
-            pser, psser = pdf[col], psdf[col]
-            if psser.dtype in [float, int, np.int32]:
-                self.assert_eq(pser / pser, psser / psser)
-                self.assert_eq(pser / pser.astype(bool), psser / psser.astype(bool))
-                self.assert_eq(pser / True, psser / True)
-                self.assert_eq(pser / False, psser / False)
-
-            for n_col in self.non_numeric_df_cols:
-                if n_col == "bool":
-                    self.assert_eq(pdf["float"] / pdf[n_col], psdf["float"] / psdf[n_col])
-                else:
-                    self.assertRaises(TypeError, lambda: psser / psdf[n_col])
-
-    def test_floordiv(self):
-        pdf, psdf = self.pdf, self.psdf
-        pser, psser = pdf["float"], psdf["float"]
-        self.assert_eq(pser // pser, psser // psser)
-        self.assert_eq(pser // pser.astype(bool), psser // psser.astype(bool))
-        self.assert_eq(pser // True, psser // True)
-        self.assert_eq(pser // False, psser // False)
-
-        for n_col in self.non_numeric_df_cols:
-            if n_col == "bool":
-                self.assert_eq(pdf["float"] // pdf["bool"], psdf["float"] // psdf["bool"])
-            else:
-                for col in self.numeric_df_cols:
-                    psser = psdf[col]
-                    self.assertRaises(TypeError, lambda: psser // psdf[n_col])
-
-    def test_mod(self):
-        pdf, psdf = self.pdf, self.psdf
-        for col in self.numeric_df_cols:
-            pser, psser = pdf[col], psdf[col]
-            self.assert_eq(pser % pser, psser % psser)
-            self.assert_eq(pser % pser.astype(bool), psser % psser.astype(bool))
-            self.assert_eq(pser % True, psser % True)
-            if col in ["int", "int32"]:
-                self.assert_eq(
-                    pd.Series([np.nan, np.nan, np.nan], dtype=float, name=col), psser % False
-                )
-            else:
-                self.assert_eq(
-                    pd.Series([np.nan, np.nan, np.nan], dtype=pser.dtype, name=col), psser % False
-                )
-
-            for n_col in self.non_numeric_df_cols:
-                if n_col == "bool":
-                    self.assert_eq(pdf["float"] % pdf[n_col], psdf["float"] % psdf[n_col])
-                else:
-                    self.assertRaises(TypeError, lambda: psser % psdf[n_col])
-
-    def test_pow(self):
-        pdf, psdf = self.pdf, self.psdf
-        for col in self.numeric_df_cols:
-            pser, psser = pdf[col], psdf[col]
-            if col in ["float", "float_w_nan"]:
-                self.assert_eq(pser**pser, psser**psser)
-                self.assert_eq(pser ** pser.astype(bool), psser ** psser.astype(bool))
-                self.assert_eq(pser**True, psser**True)
-                self.assert_eq(pser**False, psser**False)
-                self.assert_eq(pser**1, psser**1)
-                self.assert_eq(pser**0, psser**0)
-
-            for n_col in self.non_numeric_df_cols:
-                if n_col == "bool":
-                    self.assert_eq(pdf["float"] ** pdf[n_col], psdf["float"] ** psdf[n_col])
-                else:
-                    self.assertRaises(TypeError, lambda: psser ** psdf[n_col])
-
 
 class ArithmeticTests(
     ArithmeticTestsMixin,

diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_mod.py b/python/pyspark/pandas/tests/data_type_ops/test_num_mod.py
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+import pandas as pd
+import numpy as np
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
+
+
+class NumModTestsMixin:
+    @property
+    def float_pser(self):
+        return pd.Series([1, 2, 3], dtype=float)
+
+    @property
+    def float_psser(self):
+        return ps.from_pandas(self.float_pser)
+
+    def test_mod(self):
+        pdf, psdf = self.pdf, self.psdf
+        for col in self.numeric_df_cols:
+            pser, psser = pdf[col], psdf[col]
+            self.assert_eq(pser % pser, psser % psser)
+            self.assert_eq(pser % pser.astype(bool), psser % psser.astype(bool))
+            self.assert_eq(pser % True, psser % True)
+            if col in ["int", "int32"]:
+                self.assert_eq(
+                    pd.Series([np.nan, np.nan, np.nan], dtype=float, name=col), psser % False
+                )
+            else:
+                self.assert_eq(
+                    pd.Series([np.nan, np.nan, np.nan], dtype=pser.dtype, name=col), psser % False
+                )
+
+            for n_col in self.non_numeric_df_cols:
+                if n_col == "bool":
+                    self.assert_eq(pdf["float"] % pdf[n_col], psdf["float"] % psdf[n_col])
+                else:
+                    self.assertRaises(TypeError, lambda: psser % psdf[n_col])
+
+
+class NumModTests(
+    NumModTestsMixin,
+    OpsTestBase,
+    PandasOnSparkTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.data_type_ops.test_num_mod import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)