Skip to content

Commit d4961e1

Browse files
authored
Fix Literal partitioning in cudf-polars (#19160)
Closes #19147 When a `Select` does not depend on any columns from the child, we must avoid using multiple partitions. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Tom Augspurger (https://github.com/TomAugspurger) URL: #19160
1 parent b49728e commit d4961e1

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

python/cudf_polars/cudf_polars/experimental/select.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
import pylibcudf as plc
1010

1111
from cudf_polars.dsl import expr
12-
from cudf_polars.dsl.ir import HConcat, Scan, Select, Union
12+
from cudf_polars.dsl.expr import Col, Len
13+
from cudf_polars.dsl.ir import Empty, HConcat, Scan, Select, Union
1314
from cudf_polars.dsl.traversal import traversal
1415
from cudf_polars.experimental.base import PartitionInfo
1516
from cudf_polars.experimental.dispatch import lower_ir_node
@@ -116,6 +117,7 @@ def _(
116117
and isinstance(child.children[0], Scan)
117118
and child.children[0].predicate is None
118119
):
120+
# Special Case: Fast count.
119121
scan = child.children[0]
120122
count = scan.fast_count()
121123
dtype = ir.exprs[0].value.dtype
@@ -135,9 +137,18 @@ def _(
135137
partition_info[new_node] = PartitionInfo(count=1)
136138
return new_node, partition_info
137139

140+
if not any(
141+
isinstance(expr, (Col, Len)) for expr in traversal([e.value for e in ir.exprs])
142+
):
143+
# Special Case: Selection does not depend on any columns.
144+
new_node = ir.reconstruct([input_ir := Empty()])
145+
partition_info[input_ir] = partition_info[new_node] = PartitionInfo(count=1)
146+
return new_node, partition_info
147+
138148
if pi.count > 1 and not all(
139149
expr.is_pointwise for expr in traversal([e.value for e in ir.exprs])
140150
):
151+
# Special Case: Multiple partitions with 1+ non-pointwise expressions.
141152
try:
142153
# Try decomposing the underlying expressions
143154
return decompose_select(

python/cudf_polars/tests/experimental/test_select.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,10 @@ def test_select_parquet_fast_count(request, tmp_path, df, engine):
124124
df.collect().write_parquet(file)
125125
q = pl.scan_parquet(file).select(pl.len())
126126
assert_gpu_result_equal(q, engine=engine)
127+
128+
129+
def test_select_literal(engine):
130+
# See: https://github.com/rapidsai/cudf/issues/19147
131+
ldf = pl.LazyFrame({"a": list(range(10))})
132+
q = ldf.select(pl.lit(2).pow(pl.lit(-3, dtype=pl.Float32)))
133+
assert_gpu_result_equal(q, engine=engine)

0 commit comments

Comments
 (0)