Skip to content

Commit d2cdbd4

Browse files
authored
Don't put GeoSeries in map_partitions kwarg (#205)
* Workaround for the fact that we can't include a partitioned _Frame in map_partitions kwargs without it being concatenated on a single worker. This is an upstream bug dask/dask#8308, but the fix is challenging. * Check for dd.core._Frame * Add test * Address code review
1 parent 91b5de7 commit d2cdbd4

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

dask_geopandas/core.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,15 @@ def from_dask_dataframe(df, geometry=None):
778778
the values will be set as 'geometry' column on the GeoDataFrame.
779779
780780
"""
781+
# If the geometry column is already a partitioned `_Frame`, we can't refer to
782+
# it via a keyword-argument due to https://github.com/dask/dask/issues/8308.
783+
# Instead, we assign the geometry column using regular dataframe operations,
784+
# then refer to that column by name in `map_partitions`.
785+
if isinstance(geometry, dd.core.Series):
786+
name = geometry.name if geometry.name is not None else "geometry"
787+
return df.assign(**{name: geometry}).map_partitions(
788+
geopandas.GeoDataFrame, geometry=name
789+
)
781790
return df.map_partitions(geopandas.GeoDataFrame, geometry=geometry)
782791

783792

dask_geopandas/tests/test_core.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,12 @@ def test_from_dask_dataframe_with_dask_geoseries():
390390
dask_obj = dask_geopandas.from_dask_dataframe(
391391
dask_obj, geometry=dask_geopandas.points_from_xy(dask_obj, "x", "y")
392392
)
393+
# Check that the geometry isn't concatenated and embedded a second time in
394+
# the high-level graph. cf. https://github.com/geopandas/dask-geopandas/issues/197
395+
k = next(k for k in dask_obj.dask.dependencies if k.startswith("GeoDataFrame"))
396+
deps = dask_obj.dask.dependencies[k]
397+
assert len(deps) == 1
398+
393399
expected = df.set_geometry(geopandas.points_from_xy(df["x"], df["y"]))
394400
assert_geoseries_equal(dask_obj.geometry.compute(), expected.geometry)
395401

0 commit comments

Comments
 (0)