Skip to content

Commit

Permalink
[SPARK-46726][PS][TESTS] Rebalance pyspark_pandas_connect_part?
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Rebalance `pyspark_pandas_connect_part?`

### Why are the changes needed?
for testing parallelism

before: https://github.com/apache/spark/actions/runs/7527560858/job/20487999563
`pyspark_pandas_connect_part0`: `Tests passed in 3979 seconds`
`pyspark_pandas_connect_part1`: `Tests passed in 3585 seconds`
`pyspark_pandas_connect_part2`: `Tests passed in 2724 seconds`
`pyspark_pandas_connect_part3`: `Tests passed in 3276 seconds`

the difference is about 20 min

after:
`pyspark_pandas_connect_part0`: `Tests passed in 3516 seconds`
`pyspark_pandas_connect_part1`: `Tests passed in 3228 seconds`
`pyspark_pandas_connect_part2`: `Tests passed in 3760 seconds`
`pyspark_pandas_connect_part3`: `Tests passed in 3195 seconds`

the difference is about 5 min

### Does this PR introduce _any_ user-facing change?
no. test-only

### How was this patch tested?
ci, https://github.com/zhengruifeng/spark/actions/runs/7527236548/job/20488637410

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #44741 from zhengruifeng/ps_test_rebalance_pandas_connect.

Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
  • Loading branch information
zhengruifeng committed Jan 15, 2024
1 parent f3ffd3c commit b095960
Showing 1 changed file with 43 additions and 43 deletions.
86 changes: 43 additions & 43 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,21 @@ def __hash__(self):
],
python_test_goals=[
# pandas-on-Spark unittests
"pyspark.pandas.tests.connect.test_parity_categorical",
"pyspark.pandas.tests.connect.test_parity_config",
"pyspark.pandas.tests.connect.test_parity_extension",
"pyspark.pandas.tests.connect.test_parity_frame_spark",
"pyspark.pandas.tests.connect.test_parity_generic_functions",
"pyspark.pandas.tests.connect.test_parity_indexops_spark",
"pyspark.pandas.tests.connect.test_parity_internal",
"pyspark.pandas.tests.connect.test_parity_namespace",
"pyspark.pandas.tests.connect.test_parity_numpy_compat",
"pyspark.pandas.tests.connect.test_parity_repr",
"pyspark.pandas.tests.connect.test_parity_scalars",
"pyspark.pandas.tests.connect.test_parity_spark_functions",
"pyspark.pandas.tests.connect.test_parity_sql",
"pyspark.pandas.tests.connect.test_parity_typedef",
"pyspark.pandas.tests.connect.test_parity_utils",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_as_type",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_base",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_binary_ops",
Expand All @@ -1073,37 +1088,15 @@ def __hash__(self):
"pyspark.pandas.tests.connect.data_type_ops.test_parity_string_ops",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_udt_ops",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_timedelta_ops",
"pyspark.pandas.tests.connect.indexes.test_parity_category",
"pyspark.pandas.tests.connect.indexes.test_parity_timedelta",
"pyspark.pandas.tests.connect.plot.test_parity_frame_plot",
"pyspark.pandas.tests.connect.plot.test_parity_frame_plot_matplotlib",
"pyspark.pandas.tests.connect.plot.test_parity_frame_plot_plotly",
"pyspark.pandas.tests.connect.plot.test_parity_series_plot",
"pyspark.pandas.tests.connect.plot.test_parity_series_plot_matplotlib",
"pyspark.pandas.tests.connect.plot.test_parity_series_plot_plotly",
"pyspark.pandas.tests.connect.test_parity_categorical",
"pyspark.pandas.tests.connect.test_parity_config",
"pyspark.pandas.tests.connect.indexes.test_parity_default",
"pyspark.pandas.tests.connect.test_parity_extension",
"pyspark.pandas.tests.connect.test_parity_frame_spark",
"pyspark.pandas.tests.connect.test_parity_generic_functions",
"pyspark.pandas.tests.connect.test_parity_indexops_spark",
"pyspark.pandas.tests.connect.test_parity_internal",
"pyspark.pandas.tests.connect.test_parity_namespace",
"pyspark.pandas.tests.connect.test_parity_numpy_compat",
"pyspark.pandas.tests.connect.test_parity_repr",
"pyspark.pandas.tests.connect.resample.test_parity_error",
"pyspark.pandas.tests.connect.resample.test_parity_missing",
"pyspark.pandas.tests.connect.resample.test_parity_on",
"pyspark.pandas.tests.connect.resample.test_parity_timezone",
"pyspark.pandas.tests.connect.test_parity_scalars",
"pyspark.pandas.tests.connect.series.test_parity_datetime",
"pyspark.pandas.tests.connect.series.test_parity_string_ops_adv",
"pyspark.pandas.tests.connect.series.test_parity_string_ops_basic",
"pyspark.pandas.tests.connect.test_parity_spark_functions",
"pyspark.pandas.tests.connect.test_parity_sql",
"pyspark.pandas.tests.connect.test_parity_typedef",
"pyspark.pandas.tests.connect.test_parity_utils",
"pyspark.pandas.tests.connect.indexes.test_parity_category",
"pyspark.pandas.tests.connect.indexes.test_parity_timedelta",
"pyspark.pandas.tests.connect.indexes.test_parity_basic",
"pyspark.pandas.tests.connect.indexes.test_parity_getattr",
"pyspark.pandas.tests.connect.indexes.test_parity_name",
Expand Down Expand Up @@ -1146,11 +1139,6 @@ def __hash__(self):
"pyspark.pandas.tests.connect.computation.test_parity_describe",
"pyspark.pandas.tests.connect.computation.test_parity_eval",
"pyspark.pandas.tests.connect.computation.test_parity_melt",
"pyspark.pandas.tests.connect.frame.test_parity_attrs",
"pyspark.pandas.tests.connect.frame.test_parity_axis",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_frame",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_series",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_error",
],
excluded_python_implementations=[
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
Expand All @@ -1166,6 +1154,8 @@ def __hash__(self):
],
python_test_goals=[
# pandas-on-Spark unittests
"pyspark.pandas.tests.connect.frame.test_parity_attrs",
"pyspark.pandas.tests.connect.frame.test_parity_axis",
"pyspark.pandas.tests.connect.frame.test_parity_constructor",
"pyspark.pandas.tests.connect.frame.test_parity_conversion",
"pyspark.pandas.tests.connect.frame.test_parity_reindexing",
Expand All @@ -1183,14 +1173,9 @@ def __hash__(self):
"pyspark.pandas.tests.connect.groupby.test_parity_split_apply_adv",
"pyspark.pandas.tests.connect.groupby.test_parity_split_apply_basic",
"pyspark.pandas.tests.connect.groupby.test_parity_split_apply_min_max",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_align",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic_slow",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_cov",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_corrwith",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_index",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_series",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_frame",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_series",
"pyspark.pandas.tests.connect.series.test_parity_datetime",
"pyspark.pandas.tests.connect.series.test_parity_string_ops_adv",
"pyspark.pandas.tests.connect.series.test_parity_string_ops_basic",
"pyspark.pandas.tests.connect.series.test_parity_all_any",
"pyspark.pandas.tests.connect.series.test_parity_arg_ops",
"pyspark.pandas.tests.connect.series.test_parity_as_of",
Expand All @@ -1203,6 +1188,7 @@ def __hash__(self):
"pyspark.pandas.tests.connect.series.test_parity_series",
"pyspark.pandas.tests.connect.series.test_parity_sort",
"pyspark.pandas.tests.connect.series.test_parity_stat",
"pyspark.pandas.tests.connect.series.test_parity_interpolate",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_num_arithmetic",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod",
"pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div",
Expand Down Expand Up @@ -1235,15 +1221,15 @@ def __hash__(self):
"pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
"pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
"pyspark.pandas.tests.connect.computation.test_parity_stats",
"pyspark.pandas.tests.connect.indexes.test_parity_append",
"pyspark.pandas.tests.connect.indexes.test_parity_intersection",
"pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
"pyspark.pandas.tests.connect.indexes.test_parity_union",
"pyspark.pandas.tests.connect.computation.test_parity_missing_data",
"pyspark.pandas.tests.connect.frame.test_parity_interpolate",
"pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
"pyspark.pandas.tests.connect.series.test_parity_interpolate",
"pyspark.pandas.tests.connect.resample.test_parity_frame",
"pyspark.pandas.tests.connect.resample.test_parity_series",
"pyspark.pandas.tests.connect.resample.test_parity_error",
"pyspark.pandas.tests.connect.resample.test_parity_missing",
"pyspark.pandas.tests.connect.resample.test_parity_on",
"pyspark.pandas.tests.connect.resample.test_parity_timezone",
"pyspark.pandas.tests.connect.window.test_parity_ewm_error",
"pyspark.pandas.tests.connect.window.test_parity_ewm_mean",
"pyspark.pandas.tests.connect.window.test_parity_groupby_ewm_mean",
Expand All @@ -1263,7 +1249,17 @@ def __hash__(self):
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_adv",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_count",
"pyspark.pandas.tests.connect.computation.test_parity_missing_data",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_frame",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_series",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_error",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_align",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic_slow",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_cov",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_corrwith",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_index",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_series",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_frame",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_series",
"pyspark.pandas.tests.connect.groupby.test_parity_index",
"pyspark.pandas.tests.connect.groupby.test_parity_describe",
"pyspark.pandas.tests.connect.groupby.test_parity_head_tail",
Expand Down Expand Up @@ -1301,6 +1297,10 @@ def __hash__(self):
"pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_func",
"pyspark.pandas.tests.connect.groupby.test_parity_stat_prod",
"pyspark.pandas.tests.connect.indexes.test_parity_append",
"pyspark.pandas.tests.connect.indexes.test_parity_intersection",
"pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
"pyspark.pandas.tests.connect.indexes.test_parity_union",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
Expand Down

0 comments on commit b095960

Please sign in to comment.