diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 605175e32d2..262a11ff667 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Deprecations Bug fixes ~~~~~~~~~ +- Fix the ``align_chunks`` parameter on the :py:meth:`~xarray.Dataset.to_zarr` method, it was not being + passed to the underlying :py:meth:`~xarray.backends.api` method (:issue:`10501`, :pull:`10516`). + - Fix Pydap Datatree backend testing. Testing now compares elements of (unordered) two sets (before, lists) (:pull:`10525`). By `Miguel Jimenez-Urias `_. diff --git a/xarray/backends/chunks.py b/xarray/backends/chunks.py index f17f5375976..1fc2681c3e0 100644 --- a/xarray/backends/chunks.py +++ b/xarray/backends/chunks.py @@ -159,6 +159,13 @@ def grid_rechunk( if not nd_var_chunks: return v + # This is useful for the scenarios where the enc_chunks are bigger than the + # variable chunks, which happens when the user specifies the enc_chunks manually. + enc_chunks = tuple( + min(enc_chunk, sum(var_chunk)) + for enc_chunk, var_chunk in zip(enc_chunks, nd_var_chunks, strict=True) + ) + nd_grid_chunks = tuple( build_grid_chunks( sum(var_chunks), @@ -191,9 +198,9 @@ def validate_grid_chunks_alignment( base_error = ( "Specified Zarr chunks encoding['chunks']={enc_chunks!r} for " "variable named {name!r} would overlap multiple Dask chunks. " - "Check the chunk at position {var_chunk_pos}, which has a size of " - "{var_chunk_size} on dimension {dim_i}. It is unaligned with " - "backend chunks of size {chunk_size} in region {region}. " + "Please check the Dask chunks at position {var_chunk_pos} and " + "{var_chunk_pos_next}, on axis {axis}, they are overlapped " + "on the same Zarr chunk in the region {region}. " "Writing this array in parallel with Dask could lead to corrupted data. " "To resolve this issue, consider one of the following options: " "- Rechunk the array using `chunk()`. " @@ -202,7 +209,7 @@ def validate_grid_chunks_alignment( "- Enable automatic chunks alignment with `align_chunks=True`." ) - for dim_i, chunk_size, var_chunks, interval, size in zip( + for axis, chunk_size, var_chunks, interval, size in zip( range(len(enc_chunks)), enc_chunks, nd_var_chunks, @@ -215,9 +222,10 @@ def validate_grid_chunks_alignment( raise ValueError( base_error.format( var_chunk_pos=i + 1, + var_chunk_pos_next=i + 2, var_chunk_size=chunk, + axis=axis, name=name, - dim_i=dim_i, chunk_size=chunk_size, region=interval, enc_chunks=enc_chunks, @@ -237,9 +245,10 @@ def validate_grid_chunks_alignment( raise ValueError( base_error.format( var_chunk_pos=0, + var_chunk_pos_next=0, var_chunk_size=var_chunks[0], + axis=axis, name=name, - dim_i=dim_i, chunk_size=chunk_size, region=interval, enc_chunks=enc_chunks, @@ -251,9 +260,10 @@ def validate_grid_chunks_alignment( error_on_last_chunk = base_error.format( var_chunk_pos=len(var_chunks) - 1, + var_chunk_pos_next=len(var_chunks) - 1, var_chunk_size=var_chunks[-1], + axis=axis, name=name, - dim_i=dim_i, chunk_size=chunk_size, region=interval, enc_chunks=enc_chunks, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0f2dd266129..010ec64f643 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2302,6 +2302,7 @@ def to_zarr( append_dim=append_dim, region=region, safe_chunks=safe_chunks, + align_chunks=align_chunks, zarr_version=zarr_version, zarr_format=zarr_format, write_empty_chunks=write_empty_chunks, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6997be200b1..d479aaa0594 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -6933,6 +6933,54 @@ def test_zarr_safe_chunk_region(self, mode: Literal["r+", "a"]): chunk = chunk.chunk() self.save(store, chunk.chunk(), region=region) + @requires_dask + def test_dataset_to_zarr_align_chunks_true(self, tmp_store) -> None: + # This test is a replica of the one in `test_dataarray_to_zarr_align_chunks_true` + # but for datasets + with self.create_zarr_target() as store: + ds = ( + DataArray( + np.arange(4).reshape((2, 2)), + dims=["a", "b"], + coords={ + "a": np.arange(2), + "b": np.arange(2), + }, + ) + .chunk(a=(1, 1), b=(1, 1)) + .to_dataset(name="foo") + ) + + self.save( + store, + ds, + align_chunks=True, + encoding={"foo": {"chunks": (3, 3)}}, + mode="w", + ) + assert_identical(ds, xr.open_zarr(store)) + + ds = ( + DataArray( + np.arange(4, 8).reshape((2, 2)), + dims=["a", "b"], + coords={ + "a": np.arange(2), + "b": np.arange(2), + }, + ) + .chunk(a=(1, 1), b=(1, 1)) + .to_dataset(name="foo") + ) + + self.save( + store, + ds, + align_chunks=True, + region="auto", + ) + assert_identical(ds, xr.open_zarr(store)) + @requires_h5netcdf @requires_fsspec