From 4a0de07fd395ce8c4e279276597f07268c2de7e4 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Fri, 19 Dec 2025 16:01:31 -0600 Subject: [PATCH 1/3] [SPARK-54787][PS] Use list comprehension in pandas _bool_column_labels Signed-off-by: Devin Petersohn --- python/pyspark/pandas/frame.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index e5aaecbb64fd..4d34c51bc94d 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -11262,15 +11262,13 @@ def _bool_column_labels(self, column_labels: List[Label]) -> List[Label]: """ Filter column labels of boolean columns (without None). """ - bool_column_labels = [] - for label in column_labels: - psser = self._psser_for(label) - if is_bool_dtype(psser): - # Rely on dtype rather than spark type because - # columns that consist of bools and Nones should be excluded - # if bool_only is True - bool_column_labels.append(label) - return bool_column_labels + # Rely on dtype rather than spark type because + # columns that consist of bools and Nones should be excluded + # if bool_only is True + return [ + label for label in column_labels + if is_bool_dtype(self._psser_for(label)) + ] def _result_aggregated( self, column_labels: List[Label], scols: Sequence[PySparkColumn] From be0b28b9496036e97866f4c35a8e310baf0c67ef Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Fri, 19 Dec 2025 16:03:30 -0600 Subject: [PATCH 2/3] Reformat Signed-off-by: Devin Petersohn --- python/pyspark/pandas/frame.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 4d34c51bc94d..81a7fa64e538 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -11265,10 +11265,7 @@ def _bool_column_labels(self, column_labels: List[Label]) -> List[Label]: # Rely on dtype rather than spark type because # columns that consist of bools and Nones should be excluded # if bool_only is True - return [ - label for label in column_labels - if is_bool_dtype(self._psser_for(label)) - ] + return [label for label in column_labels if is_bool_dtype(self._psser_for(label))] def _result_aggregated( self, column_labels: List[Label], scols: Sequence[PySparkColumn] From fd52a4ca8ce252edefa6682e0d65ba75038499fe Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Fri, 19 Dec 2025 16:06:06 -0600 Subject: [PATCH 3/3] Comment Signed-off-by: Devin Petersohn --- python/pyspark/pandas/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 81a7fa64e538..2238d044cefe 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -11262,9 +11262,8 @@ def _bool_column_labels(self, column_labels: List[Label]) -> List[Label]: """ Filter column labels of boolean columns (without None). """ - # Rely on dtype rather than spark type because - # columns that consist of bools and Nones should be excluded - # if bool_only is True + # Rely on dtype rather than spark type because columns that consist of bools and + # Nones should be excluded if bool_only is True return [label for label in column_labels if is_bool_dtype(self._psser_for(label))] def _result_aggregated(