From c93a644d3c8ef1be0e149e0e5e0bfae0bc539230 Mon Sep 17 00:00:00 2001 From: Asish Kumar Date: Thu, 9 Apr 2026 20:05:58 +0000 Subject: [PATCH 1/2] Restore -1 Zarr chunk sentinel handling Translate xarray's documented -1 chunk sentinel into explicit full-dimension chunk sizes before passing chunk metadata to zarr, and cover the regression with a roundtrip test. --- doc/whats-new.rst | 3 +++ xarray/backends/zarr.py | 11 ++++++++++- xarray/tests/test_backends.py | 7 +++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 66bd461157e..c4a7db21208 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -120,6 +120,9 @@ Bug Fixes By `Emmanuel Ferdman `_. - :func:`combine_by_coords` no longer returns an empty dataset when a generator is passed as ``data_objects`` (:issue:`10114`, :pull:`11265`). By `Amartya Anand `_. +- Restore support for ``-1`` chunk sizes in Zarr encoding, mapping them to the + full length of each written dimension (:issue:`11288`). + By `Sarthak `_. - Fix h5netcdf backend module detection and ros3 tests (:issue:`11243`, :pull:`11274`). By `Kai Mühlbauer `_. diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d9279dc2de9..8f8280f1130 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -333,7 +333,7 @@ async def async_getitem(self, key): ) -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, shape): """ Given encoding chunks (possibly None or []) and variable chunks (possibly None or []). @@ -389,6 +389,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): var_chunks, ndim, name, + shape, ) for x in enc_chunks_tuple: @@ -400,6 +401,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): f"for variable named {name!r}." ) + # Preserve xarray's documented convention that -1 means the full length + # of a dimension when encoding chunk sizes for zarr. + enc_chunks_tuple = tuple( + dim_size if chunk == -1 else chunk + for chunk, dim_size in zip(enc_chunks_tuple, shape, strict=True) + ) + # if there are chunks in encoding and the variable data is a numpy array, # we use the specified chunks if not var_chunks: @@ -532,6 +540,7 @@ def extract_zarr_variable_encoding( var_chunks=variable.chunks, ndim=variable.ndim, name=name, + shape=variable.shape, ) if _zarr_v3() and chunks is None: chunks = "auto" diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e42bfc2cd9f..37b1db791a8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2902,6 +2902,13 @@ def test_chunk_encoding(self) -> None: with self.roundtrip(data) as actual: pass + def test_chunk_encoding_full_dimension_sentinel(self) -> None: + data = create_test_data() + data["var2"].encoding.update({"chunks": (5, -1)}) + + with self.roundtrip(data) as actual: + assert actual["var2"].encoding["chunks"] == (5, data["var2"].shape[1]) + def test_shard_encoding(self) -> None: # These datasets have no dask chunks. All chunking/sharding specified in # encoding From dad7145f3eafa1ad5d17f51ea8ef4d70dd0b4c2d Mon Sep 17 00:00:00 2001 From: Asish Kumar Date: Sun, 12 Apr 2026 05:21:08 +0000 Subject: [PATCH 2/2] test: assert zarr -1 chunk encoding is preserved --- xarray/tests/test_backends.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 37b1db791a8..a898214b09a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2908,6 +2908,7 @@ def test_chunk_encoding_full_dimension_sentinel(self) -> None: with self.roundtrip(data) as actual: assert actual["var2"].encoding["chunks"] == (5, data["var2"].shape[1]) + assert data["var2"].encoding["chunks"] == (5, -1) def test_shard_encoding(self) -> None: # These datasets have no dask chunks. All chunking/sharding specified in