zarr-developers · d-v-b · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 8, 2026
diff --git a/.gitignore b/.gitignore
@@ -92,5 +92,9 @@ tests/.hypothesis
 zarr/version.py
 zarr.egg-info/
 
+# Local agent / planning notes (not versioned)
+.claude/
+CLAUDE.md
+docs/superpowers/
 # zarr-metadata package lockfile (a library, not an app)
 packages/zarr-metadata/uv.lock
diff --git a/changes/PLACEHOLDER-fused-default.feature.md b/changes/PLACEHOLDER-fused-default.feature.md
@@ -0,0 +1 @@
+`FusedCodecPipeline` is now the default codec pipeline. It runs codec compute synchronously and in bulk (avoiding the per-chunk async scheduling overhead of `BatchedCodecPipeline`), giving large speedups for sharded arrays (up to ~24x writes / ~14x reads on many-chunks-per-shard layouts, more with compression) and no regressions on compute-bound workloads. The previous behavior is available by setting `zarr.config.set({"codec_pipeline.path": "zarr.core.codec_pipeline.BatchedCodecPipeline"})`.
diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py
@@ -694,6 +694,73 @@ async def get_ranges(
         ):
             yield group
 
+    def get_ranges_sync(
+        self,
+        key: str,
+        byte_ranges: Sequence[ByteRequest | None],
+        *,
+        prototype: BufferPrototype,
+        max_gap_bytes: int = 1 << 20,  # 1 MiB
+        max_coalesced_bytes: int = 16 << 20,  # 16 MiB
+    ) -> Sequence[tuple[int, Buffer | None]]:
+        """Synchronous, coalescing counterpart of `get_ranges`.
+
+        Plans merged fetches with the same `coalesce_ranges` policy as the async
+        path, then issues one synchronous `get_sync` per merged group (or per
+        uncoalescable request) and slices results back into per-input buffers.
+        Used by the sync codec pipeline's partial-shard reads so they get the
+        same byte-range coalescing as the async path, without an event loop.
+
+        Returns a list of `(input_index, Buffer | None)`. Raises
+        `BaseExceptionGroup` containing a `FileNotFoundError` if the key is
+        absent (matching `get_ranges`), so callers can handle a deleted shard
+        uniformly across the sync and async paths.
+
+        Requires the store to implement `get_sync` (`SupportsGetSync`).
+        """
+        from zarr.core._coalesce import coalesce_ranges
+
+        if not isinstance(self, SupportsGetSync):
+            raise TypeError(f"{type(self).__name__} does not support synchronous reads")
+
+        groups, uncoalescable = coalesce_ranges(
+            byte_ranges, max_gap_bytes=max_gap_bytes, max_coalesced_bytes=max_coalesced_bytes
+        )
+        results: list[tuple[int, Buffer | None]] = []
+        errors: list[BaseException] = []
+
+        def _get(req: ByteRequest | None) -> Buffer | None:
+            return self.get_sync(key, prototype=prototype, byte_range=req)
+
+        for idx, req in uncoalescable:
+            buf = _get(req)
+            if buf is None:
+                errors.append(FileNotFoundError(key))
+            else:
+                results.append((idx, buf))
+
+        for members in groups:
+            if len(members) == 1:
+                solo_idx, solo_req = members[0]
+                buf = _get(solo_req)
+                if buf is None:
+                    errors.append(FileNotFoundError(key))
+                else:
+                    results.append((solo_idx, buf))
+                continue
+            start = members[0][1].start
+            end = max(r.end for _, r in members)
+            big = _get(RangeByteRequest(start, end))
+            if big is None:
+                errors.append(FileNotFoundError(key))
+                continue
+            for member_idx, r in members:
+                results.append((member_idx, big[r.start - start : r.end - start]))
+
+        if errors:
+            raise BaseExceptionGroup("chunk read failed", errors)
+        return results
+
     async def getsize(self, key: str) -> int:
         """
         Return the size, in bytes, of a value in a Store.

diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
@@ -23,22 +23,22 @@ class V2Codec(ArrayBytesCodec):
 
     is_fixed_size = False
 
-    async def _decode_single(
+    def _decode_sync(
         self,
         chunk_bytes: Buffer,
         chunk_spec: ArraySpec,
     ) -> NDBuffer:
         cdata = chunk_bytes.as_array_like()
         # decompress
         if self.compressor:
-            chunk = await asyncio.to_thread(self.compressor.decode, cdata)
+            chunk = self.compressor.decode(cdata)
         else:
             chunk = cdata
 
         # apply filters
         if self.filters:
             for f in reversed(self.filters):
-                chunk = await asyncio.to_thread(f.decode, chunk)
+                chunk = f.decode(chunk)
 
         # view as numpy array with correct dtype
         chunk = ensure_ndarray_like(chunk)
@@ -70,7 +70,7 @@ async def _decode_single(
 
         return get_ndbuffer_class().from_ndarray_like(chunk)
 
-    async def _encode_single(
+    def _encode_sync(
         self,
         chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
@@ -83,18 +83,32 @@ async def _encode_single(
         # apply filters
         if self.filters:
             for f in self.filters:
-                chunk = await asyncio.to_thread(f.encode, chunk)
+                chunk = f.encode(chunk)
         # check object encoding
         if ensure_ndarray_like(chunk).dtype == object:
             raise RuntimeError("cannot write object array without object codec")
 
         # compress
         if self.compressor:
-            cdata = await asyncio.to_thread(self.compressor.encode, chunk)
+            cdata = self.compressor.encode(chunk)
         else:
             cdata = chunk
         cdata = ensure_bytes(cdata)
         return chunk_spec.prototype.buffer.from_bytes(cdata)
 
+    async def _decode_single(
+        self,
+        chunk_bytes: Buffer,
+        chunk_spec: ArraySpec,
+    ) -> NDBuffer:
+        return await asyncio.to_thread(self._decode_sync, chunk_bytes, chunk_spec)
+
+    async def _encode_single(
+        self,
+        chunk_array: NDBuffer,
+        chunk_spec: ArraySpec,
+    ) -> Buffer | None:
+        return await asyncio.to_thread(self._encode_sync, chunk_array, chunk_spec)
+
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError
diff --git a/src/zarr/codecs/numcodecs/_codecs.py b/src/zarr/codecs/numcodecs/_codecs.py
@@ -47,7 +47,7 @@
 if TYPE_CHECKING:
     from zarr.abc.numcodec import Numcodec
     from zarr.core.array_spec import ArraySpec
-    from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer
+    from zarr.core.buffer import Buffer, NDBuffer
 
 CODEC_PREFIX = "numcodecs."
 
@@ -134,53 +134,63 @@ class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec):
     def __init__(self, **codec_config: JSON) -> None:
         super().__init__(**codec_config)
 
-    async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer:
-        return await asyncio.to_thread(
-            as_numpy_array_wrapper,
-            self._codec.decode,
-            chunk_data,
-            chunk_spec.prototype,
-        )
+    def _decode_sync(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer:
+        return as_numpy_array_wrapper(self._codec.decode, chunk_data, chunk_spec.prototype)
 
-    def _encode(self, chunk_data: Buffer, prototype: BufferPrototype) -> Buffer:
+    def _encode_sync(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer:
         encoded = self._codec.encode(chunk_data.as_array_like())
         if isinstance(encoded, np.ndarray):  # Required for checksum codecs
-            return prototype.buffer.from_bytes(encoded.tobytes())
-        return prototype.buffer.from_bytes(encoded)
+            return chunk_spec.prototype.buffer.from_bytes(encoded.tobytes())
+        return chunk_spec.prototype.buffer.from_bytes(encoded)
+
+    async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer:
+        return await asyncio.to_thread(self._decode_sync, chunk_data, chunk_spec)
 
     async def _encode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer:
-        return await asyncio.to_thread(self._encode, chunk_data, chunk_spec.prototype)
+        return await asyncio.to_thread(self._encode_sync, chunk_data, chunk_spec)
 
 
 class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec):
     def __init__(self, **codec_config: JSON) -> None:
         super().__init__(**codec_config)
 
-    async def _decode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
+    def _decode_sync(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
         chunk_ndarray = chunk_data.as_ndarray_like()
-        out = await asyncio.to_thread(self._codec.decode, chunk_ndarray)
+        out = self._codec.decode(chunk_ndarray)
         return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape))
 
-    async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
+    def _encode_sync(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
         chunk_ndarray = chunk_data.as_ndarray_like()
-        out = await asyncio.to_thread(self._codec.encode, chunk_ndarray)
+        out = self._codec.encode(chunk_ndarray)
         return chunk_spec.prototype.nd_buffer.from_ndarray_like(out)
 
+    async def _decode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
+        return await asyncio.to_thread(self._decode_sync, chunk_data, chunk_spec)
+
+    async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer:
+        return await asyncio.to_thread(self._encode_sync, chunk_data, chunk_spec)
+
 
 class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec):
     def __init__(self, **codec_config: JSON) -> None:
         super().__init__(**codec_config)
 
-    async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer:
+    def _decode_sync(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer:
         chunk_bytes = chunk_data.to_bytes()
-        out = await asyncio.to_thread(self._codec.decode, chunk_bytes)
+        out = self._codec.decode(chunk_bytes)
         return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape))
 
-    async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> Buffer:
+    def _encode_sync(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> Buffer:
         chunk_ndarray = chunk_data.as_ndarray_like()
-        out = await asyncio.to_thread(self._codec.encode, chunk_ndarray)
+        out = self._codec.encode(chunk_ndarray)
         return chunk_spec.prototype.buffer.from_bytes(out)
 
+    async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer:
+        return await asyncio.to_thread(self._decode_sync, chunk_data, chunk_spec)
+
+    async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> Buffer:
+        return await asyncio.to_thread(self._encode_sync, chunk_data, chunk_spec)
+
 
 # bytes-to-bytes codecs
 class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"):
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		`FusedCodecPipeline` is now the default codec pipeline. It runs codec compute synchronously and in bulk (avoiding the per-chunk async scheduling overhead of `BatchedCodecPipeline`), giving large speedups for sharded arrays (up to ~24x writes / ~14x reads on many-chunks-per-shard layouts, more with compression) and no regressions on compute-bound workloads. The previous behavior is available by setting `zarr.config.set({"codec_pipeline.path": "zarr.core.codec_pipeline.BatchedCodecPipeline"})`.