From da5b7c4e394fb0fc6d7a933b3a5ead4e532f3d48 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 07:23:55 -0700 Subject: [PATCH 1/3] Refactor GeoTIFF Phase 5b: extract _decode.py from _reader.py (#2246) Mechanical extraction of strip/tile decode orchestration out of _reader.py into a new _decode.py module. Behavior-neutral. Moves the transport-independent decode helpers: - _apply_predictor, _packed_byte_count, _int_nodata_in_range, _resolve_masked_fill, _decode_strip_or_tile - _read_strips, _read_tiles (the local strip/tile readers) - _apply_orientation, _apply_orientation_with_geo, _apply_photometric_miniswhite, _miniswhite_inverted_nodata - _NATIVE_ORDER, _PARALLEL_DECODE_PIXEL_THRESHOLD Left in _reader.py: top-level entry points (_read_to_array, read_to_array), the COG-HTTP fetch+decode paths, the pixel-safety guards (MAX_PIXELS_DEFAULT, _check_dimensions, _check_source_dimensions, PixelSafetyLimitError), and the sparse- layout helpers (_sparse_fill_value, _has_sparse, _compute_full_image_byte_budget, _ifd_required_extent) which move with _layout.py in PR-H (#2247). _reader.py drops from 2201 to 1407 lines; the moved code lands in _decode.py at 897 lines. The full public/internal import surface from xrspatial.geotiff._reader is preserved via a back-import block, matching the pattern PR-E used for _sources. Two strip-decode tests that monkey-patched ``_reader_mod.ThreadPoolExecutor`` now patch ``concurrent.futures.ThreadPoolExecutor`` instead, tracking the decode functions to their new home. The dispatch contract under test (parallel branch engages on multi-strip, sparse-only short- circuits) is unchanged. Part of #2211. --- xrspatial/geotiff/_decode.py | 897 ++++++++++++++++++ xrspatial/geotiff/_reader.py | 847 +---------------- .../tests/test_parallel_strip_decode_2100.py | 15 +- .../test_parallel_strip_decode_sparse_2100.py | 12 +- 4 files changed, 942 insertions(+), 829 deletions(-) create mode 100644 xrspatial/geotiff/_decode.py diff --git a/xrspatial/geotiff/_decode.py b/xrspatial/geotiff/_decode.py new file mode 100644 index 000000000..87e3f2ada --- /dev/null +++ b/xrspatial/geotiff/_decode.py @@ -0,0 +1,897 @@ +"""Strip/tile decode orchestration for TIFF/COG reads. + +This module holds the transport-independent decode helpers extracted +from :mod:`xrspatial.geotiff._reader` in PR-G of the GeoTIFF refactor +epic (issue #2211, sub-issue #2246). The functions here take an already +materialised byte slice plus an IFD and a header, and produce a numpy +array. Everything that knows how to *fetch* those bytes (local mmap, +HTTP, fsspec) stays in :mod:`xrspatial.geotiff._sources`; everything +that orchestrates the top-level read flow (windowing, overview +selection, DataArray packaging) stays in :mod:`xrspatial.geotiff._reader`. + +Public import surface from :mod:`xrspatial.geotiff._reader` is preserved +by re-importing the names defined here back at the call sites that the +test suite and downstream backends depend on. See the import block at +the top of ``_reader.py``. +""" +from __future__ import annotations + +import math +import os as _os_module +import sys as _sys + +import numpy as np + +from ._compression import ( + COMPRESSION_LERC, + decompress, + fp_predictor_decode, + lerc_decompress_with_mask, + predictor_decode, + unpack_bits, +) +from ._dtypes import SUB_BYTE_BPS, resolve_bits_per_sample +from ._geotags import ( + GeoInfo, + GeoTransform, + RASTER_PIXEL_IS_POINT, +) +from ._header import ( + IFD, + TIFFHeader, + validate_tile_layout, +) +from ._sources import _max_tile_bytes_from_env +from ._validation import _validate_predictor_sample_format + +_NATIVE_ORDER = '<' if _sys.byteorder == 'little' else '>' + +#: Sentinel used as the default value of ``max_pixels`` so the actual +#: ``MAX_PIXELS_DEFAULT`` can stay in :mod:`._reader` without making this +#: module import-time dependent on it. ``_resolve_max_pixels`` does the +#: lazy lookup at call time. +_MAX_PIXELS_UNSET = object() + + +def _resolve_max_pixels(value): + """Return ``MAX_PIXELS_DEFAULT`` when *value* is the unset sentinel.""" + if value is _MAX_PIXELS_UNSET: + from ._reader import MAX_PIXELS_DEFAULT + return MAX_PIXELS_DEFAULT + return value + +#: Per-tile pixel count at and above which the local and HTTP tile-read paths +#: spread codec decode across a ``ThreadPoolExecutor``. Below this, pool +#: startup costs outweigh the parallelism win (issue #1551). Bound is inclusive +#: so the default ``tile_size=256`` (256*256 == 64*1024) lands on the parallel +#: path. Used by both ``_read_tiles`` and ``_fetch_decode_cog_http_tiles``. +_PARALLEL_DECODE_PIXEL_THRESHOLD = 64 * 1024 + + +def _apply_predictor(chunk: np.ndarray, pred: int, width: int, + height: int, bytes_per_sample: int, + samples: int = 1, + byte_order: str = '<') -> np.ndarray: + """Apply the appropriate predictor decode to decompressed data. + + ``width``, ``height``, ``bytes_per_sample``, and ``samples`` describe + the raw pixel layout before predictor inversion: ``width * samples`` + samples per row, each ``bytes_per_sample`` bytes wide. + + Predictor=2 (horizontal differencing) operates at the *sample* level + per TIFF Technical Note (libtiff/GDAL convention): the difference is + taken between adjacent same-component samples in the sample's + natural bit width, with stride equal to ``samples`` samples. A + byte-wise implementation drops the inter-byte carry for multi-byte + samples and produces wrong values. + + Predictor=3 (floating-point) byte-swizzles each row into + ``bytes_per_sample`` interleaved lanes of length ``width * samples``, + per TIFF Technical Note 3. The un-transpose stage has to put the + MSB lane at the file's high-order byte position, which differs for + big- vs little-endian files; ``byte_order`` carries that. + """ + if pred == 2: + return predictor_decode(chunk, width, height, + bytes_per_sample, samples=samples, + byte_order=byte_order) + elif pred == 3: + return fp_predictor_decode(chunk, width * samples, height, + bytes_per_sample, + big_endian=(byte_order == '>')) + return chunk + + +def _packed_byte_count(pixel_count: int, bps: int) -> int: + """Compute the number of packed bytes for sub-byte bit depths.""" + return (pixel_count * bps + 7) // 8 + + +def _int_nodata_in_range(nodata_int: int, dtype: np.dtype) -> bool: + """Return True iff *nodata_int* is representable as *dtype*. + + Used to gate ``dtype.type(int(...))`` casts that would otherwise raise + ``OverflowError`` on real-world files that pair an unsigned dtype with + a negative GDAL_NODATA sentinel (e.g. uint16 + ``-9999``). When the + sentinel cannot be represented, the file's pixels can never match it, + so the caller should treat the sentinel as a no-op for value matching + (still surfacing it via ``attrs['nodata']`` so write round-trips + preserve the original tag). + """ + if dtype.kind not in ('u', 'i'): + return False + info = np.iinfo(dtype) + return info.min <= nodata_int <= info.max + + +def _resolve_masked_fill(nodata_str: str | None, dtype: np.dtype): + """Resolve the value to use when restoring LERC-masked pixels. + + Mirrors :func:`_sparse_fill_value` but defaults to NaN for floating + dtypes when the file does not declare a nodata sentinel. Float + rasters with no GDAL_NODATA tag still benefit from NaN propagation + because LERC's zero fill would silently masquerade as a real + measurement at z == 0. + + Note: integer dtypes with no GDAL_NODATA tag fall back to ``0``, + which is the same value LERC zero-fills masked pixels with -- in + that case the mask application is intentionally a no-op. We avoid + inventing an integer sentinel (e.g. iinfo.max) because doing so + would silently change pixel values for files that never declared + one, breaking downstream consumers that key off the original data. + + Out-of-range integer sentinels (e.g. ``uint16`` paired with + ``GDAL_NODATA="-9999"``, common on legacy GDAL files) cannot be + represented in the file dtype and so cannot match any decoded + pixel; we fall back to ``0`` rather than raising ``OverflowError`` + on the dtype cast. + """ + if nodata_str is not None: + # Try ``int`` first so 64-bit sentinels survive without the + # float64 round-trip; fall back to ``float`` for NaN / Inf / + # scientific notation / fractional values. See issue #1847. + from ._geotags import _parse_nodata_str as _parse_nd + parsed = _parse_nd(nodata_str) + if parsed is not None: + if dtype.kind == 'f': + return dtype.type(parsed) + if isinstance(parsed, int): + if _int_nodata_in_range(parsed, dtype): + return dtype.type(parsed) + elif not math.isnan(parsed) and not math.isinf(parsed): + if float(parsed).is_integer(): + nodata_int = int(parsed) + if _int_nodata_in_range(nodata_int, dtype): + return dtype.type(nodata_int) + if dtype.kind == 'f': + return dtype.type(np.nan) + return dtype.type(0) + + +def _decode_strip_or_tile(data_slice, compression, width, height, samples, + bps, bytes_per_sample, is_sub_byte, dtype, pred, + byte_order='<', jpeg_tables=None, + masked_fill=None): + """Decompress, apply predictor, unpack sub-byte, and reshape a strip/tile. + + Parameters + ---------- + byte_order : str + '<' for little-endian, '>' for big-endian. When the file byte + order differs from the system's native order, pixel data is + byte-swapped after decompression. + jpeg_tables : bytes or None + Raw bytes of the file's JPEGTables tag (347), or None if the file + doesn't have one. GDAL-style tiled JPEG TIFFs store DQT/DHT tables + once in this tag and each tile is a JPEG fragment that depends on + them; the JPEG decoder splices the tables in before handing the + tile to libjpeg. Ignored for non-JPEG compressions. + masked_fill : scalar or None + Fill value written into pixels that the LERC valid-mask flags as + invalid. Only consulted for ``compression == COMPRESSION_LERC`` + when the decoder returns a non-trivial mask; ignored for every + other codec. Callers should compute it once per IFD via + :func:`_resolve_masked_fill` (typically NaN for float dtypes or + the parsed ``GDAL_NODATA`` sentinel). When ``None``, masked + pixels are left at LERC's zero fill. + + Returns an array shaped (height, width) or (height, width, samples). + """ + pixel_count = width * height * samples + if is_sub_byte: + expected = _packed_byte_count(pixel_count, bps) + else: + expected = pixel_count * bytes_per_sample + + lerc_mask = None + if compression == COMPRESSION_LERC: + # LERC needs special handling: lerc.decode also returns a + # valid-mask which the generic decompress() dispatcher discards. + # We capture it here so masked pixels can be restored to nodata + # below, instead of leaking LERC's zero fill into the output. + # Forward ``expected`` so the wrapper rejects bombs at the + # blob-header level rather than after the full buffer is + # materialised (issue #1625). + decoded_bytes, lerc_mask = lerc_decompress_with_mask( + data_slice, expected_size=expected) + chunk = np.frombuffer(decoded_bytes, dtype=np.uint8) + else: + chunk = decompress(data_slice, compression, expected, + width=width, height=height, samples=samples, + jpeg_tables=jpeg_tables) + + # Validate the decompressed byte count. A truncated deflate stream or a + # buggy compressor can produce fewer or more bytes than expected. Without + # this check the downstream reshape raises an opaque "cannot reshape array + # of size N into shape (h, w)" that hides which tile/strip broke. Edge + # tiles in a valid TIFF still decompress to the full tile_height x + # tile_width (the caller slices the top-left region), so this only fires + # on genuine corruption. + if chunk.size != expected: + raise ValueError( + f"Decompressed tile/strip size mismatch: expected {expected} " + f"bytes for a {width} x {height} x {samples} block " + f"(bps={bps}, compression={compression}), got {chunk.size}. " + f"The TIFF data is likely truncated or corrupt." + ) + + if pred in (2, 3) and not is_sub_byte: + if not chunk.flags.writeable: + chunk = chunk.copy() + chunk = _apply_predictor(chunk, pred, width, height, + bytes_per_sample, samples=samples, + byte_order=byte_order) + + if is_sub_byte: + pixels = unpack_bits(chunk, bps, pixel_count) + else: + # Use the file's byte order for the view, then convert to native. + # The view dtype must match the on-disk sample width: float16 + # files (bps=16 + SampleFormat=3) are auto-promoted to float32 + # for the user-visible array, but the raw bytes have to be + # viewed as float16 first then cast (#1941). Detect the + # promotion via the bps-vs-dtype.itemsize mismatch so the + # surrounding pipeline stays unchanged for byte-equal cases. + if dtype.itemsize * 8 != bps and bps == 16 and dtype.kind == 'f': + storage_dtype = np.dtype('float16').newbyteorder(byte_order) + pixels = chunk.view(storage_dtype).astype(dtype) + else: + file_dtype = dtype.newbyteorder(byte_order) + pixels = chunk.view(file_dtype) + if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER): + pixels = pixels.astype(dtype) + + if samples > 1: + out = pixels.reshape(height, width, samples) + else: + out = pixels.reshape(height, width) + + # Restore nodata in positions LERC flagged as invalid. LERC + # zero-fills masked pixels in the data array, which would otherwise + # be indistinguishable from real zero readings downstream. + if lerc_mask is not None and masked_fill is not None: + mask_arr = np.asarray(lerc_mask) + if mask_arr.ndim == 2 and out.ndim == 3: + mask_arr = mask_arr[..., None] + invalid = np.broadcast_to(mask_arr == 0, out.shape) + if invalid.any(): + if not out.flags.writeable: + out = out.copy() + np.putmask(out, invalid, masked_fill) + return out + + +# --------------------------------------------------------------------------- +# Strip reader +# --------------------------------------------------------------------------- + +def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader, + dtype: np.dtype, window=None, + max_pixels=_MAX_PIXELS_UNSET) -> np.ndarray: + """Read a strip-organized TIFF image. + + Parameters + ---------- + data : bytes + Full file data. + ifd : IFD + Parsed IFD for this image. + header : TIFFHeader + File header. + dtype : np.dtype + Output pixel dtype. + window : tuple or None + (row_start, col_start, row_stop, col_stop) or None for full image. + max_pixels : int + Maximum allowed pixel count (width * height * samples). + + Returns + ------- + np.ndarray with shape (height, width) or windowed subset. + """ + # Imported lazily so this module does not have an import-time + # dependency on ``_reader``. The pixel-safety guards + # (``_check_dimensions``/``_check_source_dimensions``) and the + # sparse-layout helpers (``_sparse_fill_value``/``_has_sparse``) + # both stay in ``_reader.py`` until PR-H (issue #2247); rebinding + # them here keeps PR-G mechanical. + from ._reader import ( + _check_dimensions, + _check_source_dimensions, + _has_sparse, + _sparse_fill_value, + ) + max_pixels = _resolve_max_pixels(max_pixels) + width = ifd.width + height = ifd.height + samples = ifd.samples_per_pixel + # Source-IFD dim check (issue #2053). The tiled path is already + # covered by ``validate_tile_layout``; this is its stripped-path + # parity. Run before any window clamping so a malformed + # ``ImageWidth=0`` IFD fails at the source rather than collapsing + # to an empty post-clamp window. + _check_source_dimensions(width, height, samples) + compression = ifd.compression + rps = ifd.rows_per_strip + offsets = ifd.strip_offsets + byte_counts = ifd.strip_byte_counts + pred = ifd.predictor + _validate_predictor_sample_format(pred, ifd.sample_format) + bps = resolve_bits_per_sample(ifd.bits_per_sample) + bytes_per_sample = bps // 8 + is_sub_byte = bps in SUB_BYTE_BPS + jpeg_tables = ifd.jpeg_tables + masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype) + if compression == COMPRESSION_LERC else None) + + if offsets is None or byte_counts is None: + raise ValueError("Missing strip offsets or byte counts") + + # Per-strip compressed-byte cap (issue #1664). Mirrors the HTTP path: + # a crafted ``StripByteCounts`` can declare a huge value and even + # though mmap slicing on the local path is bounded by the file size, + # the slice is still passed into the decompressor which can expand + # a few KiB of crafted deflate/zstd into gigabytes of decoded output. + # Override via ``XRSPATIAL_COG_MAX_TILE_BYTES`` (the env var is shared + # with the tile path because the budget is the same). + max_tile_bytes = _max_tile_bytes_from_env() + for _strip_idx, _bc in enumerate(byte_counts): + if _bc > max_tile_bytes: + raise ValueError( + f"TIFF strip {_strip_idx} declares " + f"StripByteCount={_bc:,} bytes, which exceeds the " + f"per-strip safety cap of {max_tile_bytes:,} bytes. " + f"The file is malformed or attempting denial-of-service. " + f"Override via XRSPATIAL_COG_MAX_TILE_BYTES if this file " + f"is legitimate." + ) + + # A corrupt header can report RowsPerStrip=0, which would divide by zero + # below. Reject it as a typed parse error rather than letting the + # ZeroDivisionError leak out to the caller. + if rps is None or rps <= 0: + raise ValueError(f"Invalid RowsPerStrip: {rps!r}") + + planar = ifd.planar_config # 1=chunky (interleaved), 2=planar (separate) + + # Determine output region + if window is not None: + r0, c0, r1, c1 = window + r0 = max(0, r0) + c0 = max(0, c0) + r1 = min(height, r1) + c1 = min(width, c1) + else: + r0, c0, r1, c1 = 0, 0, height, width + + out_h = r1 - r0 + out_w = c1 - c0 + + _check_dimensions(out_w, out_h, samples, max_pixels) + + # StripByteCounts must have at least one entry per strip; a corrupt count + # field can shrink it. Detect the mismatch after the dimension safety + # check so an oversized header raises the safety-limit error first, then + # raise a typed ValueError here instead of IndexError when the loop + # indexes past the end. + # + # For PlanarConfiguration=2 (separate / planar) each sample plane has its + # own run of strips, so the table must hold strips_per_band * samples + # entries. PlanarConfiguration=1 (chunky) interleaves samples within a + # single run of strips_per_band entries. + strips_per_band = (height + rps - 1) // rps + if planar == 2 and samples > 1: + n_strips_expected = strips_per_band * samples + if len(offsets) < n_strips_expected or len(byte_counts) < n_strips_expected: + raise ValueError( + f"Strip table truncated for planar layout " + f"(PlanarConfiguration=2): expected " + f"{n_strips_expected} entries " + f"({strips_per_band} strips x {samples} samples), got " + f"offsets={len(offsets)}, byte_counts={len(byte_counts)}") + else: + n_strips_expected = strips_per_band + if len(offsets) < n_strips_expected or len(byte_counts) < n_strips_expected: + raise ValueError( + f"Strip table truncated: expected {n_strips_expected} entries, " + f"got offsets={len(offsets)}, byte_counts={len(byte_counts)}") + + # Sparse strips (StripByteCounts == 0) must materialise as nodata or 0 + # rather than be decoded. Pre-fill the result so any skipped strips + # land on a known fill value. + sparse = _has_sparse(byte_counts) + if sparse: + fill = _sparse_fill_value(ifd, dtype) + if samples > 1: + result = np.full((out_h, out_w, samples), fill, dtype=dtype) + else: + result = np.full((out_h, out_w), fill, dtype=dtype) + elif samples > 1: + result = np.empty((out_h, out_w, samples), dtype=dtype) + else: + result = np.empty((out_h, out_w), dtype=dtype) + + # Collect strip jobs; decode in parallel when the pool overhead pays off. + # Mirrors the tile path's gate at ``_PARALLEL_DECODE_PIXEL_THRESHOLD``: + # codec decode (deflate, zstd, LZW) releases the GIL inside the C + # extension so threads actually overlap codec work across cores. The + # placement loop that copies pixels into ``result`` stays serial to + # avoid contending writes to the output buffer. See issue #2100. + strip_jobs: list[tuple[int, int, int]] = [] # (band_idx, strip_idx, global_idx) + if planar == 2 and samples > 1: + first_strip = r0 // rps + last_strip = min((r1 - 1) // rps, strips_per_band - 1) + for band_idx in range(samples): + band_offset = band_idx * strips_per_band + for strip_idx in range(first_strip, last_strip + 1): + global_idx = band_offset + strip_idx + if byte_counts[global_idx] == 0: + continue + strip_row = strip_idx * rps + strip_rows = min(rps, height - strip_row) + if strip_rows <= 0: + continue + strip_jobs.append((band_idx, strip_idx, global_idx)) + strip_samples = 1 + else: + first_strip = r0 // rps + last_strip = min((r1 - 1) // rps, len(offsets) - 1) + for strip_idx in range(first_strip, last_strip + 1): + strip_row = strip_idx * rps + strip_rows = min(rps, height - strip_row) + if strip_rows <= 0: + continue + if byte_counts[strip_idx] == 0: + continue + strip_jobs.append((0, strip_idx, strip_idx)) + strip_samples = samples + + def _decode_strip_job(job): + _band_idx, strip_idx, global_idx = job + strip_row = strip_idx * rps + strip_rows = min(rps, height - strip_row) + strip_data = data[ + offsets[global_idx]:offsets[global_idx] + byte_counts[global_idx]] + return _decode_strip_or_tile( + strip_data, compression, width, strip_rows, strip_samples, + bps, bytes_per_sample, is_sub_byte, dtype, pred, + byte_order=header.byte_order, + jpeg_tables=jpeg_tables, + masked_fill=masked_fill) + + n_strips = len(strip_jobs) + strip_pixel_count = width * rps + use_parallel = (n_strips > 1 + and strip_pixel_count >= _PARALLEL_DECODE_PIXEL_THRESHOLD) + if use_parallel: + # Function-local import (rather than the module-level binding) + # so tests that monkey-patch ``concurrent.futures.ThreadPoolExecutor`` + # see the spy class here. The tile path uses the same pattern. + from concurrent.futures import ThreadPoolExecutor + n_workers = min(n_strips, _os_module.cpu_count() or 4) + with ThreadPoolExecutor(max_workers=n_workers) as pool: + decoded_strips = list(pool.map(_decode_strip_job, strip_jobs)) + else: + decoded_strips = [_decode_strip_job(job) for job in strip_jobs] + + for (band_idx, strip_idx, _global_idx), strip_pixels in zip( + strip_jobs, decoded_strips): + strip_row = strip_idx * rps + strip_rows = min(rps, height - strip_row) + src_r0 = max(r0 - strip_row, 0) + src_r1 = min(r1 - strip_row, strip_rows) + dst_r0 = max(strip_row - r0, 0) + dst_r1 = dst_r0 + (src_r1 - src_r0) + if dst_r1 > dst_r0: + if planar == 2 and samples > 1: + result[dst_r0:dst_r1, :, band_idx] = ( + strip_pixels[src_r0:src_r1, c0:c1]) + else: + result[dst_r0:dst_r1] = strip_pixels[src_r0:src_r1, c0:c1] + + return result + + +# --------------------------------------------------------------------------- +# Tile reader +# --------------------------------------------------------------------------- + +def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, + dtype: np.dtype, window=None, + max_pixels=_MAX_PIXELS_UNSET) -> np.ndarray: + """Read a tile-organized TIFF image. + + Parameters + ---------- + data : bytes + Full file data. + ifd : IFD + Parsed IFD for this image. + header : TIFFHeader + File header. + dtype : np.dtype + Output pixel dtype. + window : tuple or None + (row_start, col_start, row_stop, col_stop) or None for full image. + max_pixels : int + Maximum allowed pixel count (width * height * samples). + + Returns + ------- + np.ndarray with shape (height, width) or windowed subset. + """ + from ._reader import ( + MAX_PIXELS_DEFAULT, + _check_dimensions, + _has_sparse, + _sparse_fill_value, + ) + max_pixels = _resolve_max_pixels(max_pixels) + width = ifd.width + height = ifd.height + tw = ifd.tile_width + th = ifd.tile_height + samples = ifd.samples_per_pixel + compression = ifd.compression + pred = ifd.predictor + _validate_predictor_sample_format(pred, ifd.sample_format) + bps = resolve_bits_per_sample(ifd.bits_per_sample) + bytes_per_sample = bps // 8 + is_sub_byte = bps in SUB_BYTE_BPS + jpeg_tables = ifd.jpeg_tables + masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype) + if compression == COMPRESSION_LERC else None) + + offsets = ifd.tile_offsets + byte_counts = ifd.tile_byte_counts + if offsets is None or byte_counts is None: + raise ValueError("Missing tile offsets or byte counts") + + if tw <= 0 or th <= 0: + raise ValueError( + f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}") + + # Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the + # TIFF header against malformed values; it is not the caller's output + # budget. The output-window check below uses ``max_pixels`` and is + # what enforces the user's per-call memory cap. The source-read path + # under ``read_vrt`` (#1796) relies on that output check to honour a + # small caller ``max_pixels`` against a normal-tile source; see + # #1823. + _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT) + + # Per-tile compressed-byte cap (issue #1664). Same env var as the + # HTTP path. mmap slicing is bounded by the file size, but the slice + # gets handed to the decompressor, and a small slice can balloon + # into gigabytes through deflate / zstd / lzw / lerc. + max_tile_bytes = _max_tile_bytes_from_env() + for _tile_idx, _bc in enumerate(byte_counts): + if _bc > max_tile_bytes: + raise ValueError( + f"TIFF tile {_tile_idx} declares " + f"TileByteCount={_bc:,} bytes, which exceeds the " + f"per-tile safety cap of {max_tile_bytes:,} bytes. " + f"The file is malformed or attempting denial-of-service. " + f"Override via XRSPATIAL_COG_MAX_TILE_BYTES if this file " + f"is legitimate." + ) + + planar = ifd.planar_config + tiles_across = math.ceil(width / tw) + tiles_down = math.ceil(height / th) + + if window is not None: + r0, c0, r1, c1 = window + r0 = max(0, r0) + c0 = max(0, c0) + r1 = min(height, r1) + c1 = min(width, c1) + else: + r0, c0, r1, c1 = 0, 0, height, width + + out_h = r1 - r0 + out_w = c1 - c0 + + _check_dimensions(out_w, out_h, samples, max_pixels) + + # Reject malformed TIFFs whose declared tile grid exceeds the number of + # supplied TileOffsets entries. Silent skipping in the CPU loop below + # would mask the problem, and the GPU path reads OOB. See issue #1219. + validate_tile_layout(ifd) + + # Sparse tiles (TileByteCounts == 0) must materialise as nodata or 0 + # rather than be decoded. Pre-fill the result so any skipped tiles + # land on a known fill value; otherwise sparse regions would leak + # uninitialised memory (full-image read) or stay zeroed regardless + # of the file's nodata setting (windowed read). + sparse = _has_sparse(byte_counts) + if sparse: + fill = _sparse_fill_value(ifd, dtype) + if samples > 1: + result = np.full((out_h, out_w, samples), fill, dtype=dtype) + else: + result = np.full((out_h, out_w), fill, dtype=dtype) + else: + _alloc = np.zeros if window is not None else np.empty + if samples > 1: + result = _alloc((out_h, out_w, samples), dtype=dtype) + else: + result = _alloc((out_h, out_w), dtype=dtype) + + tile_row_start = r0 // th + tile_row_end = min(math.ceil(r1 / th), tiles_down) + tile_col_start = c0 // tw + tile_col_end = min(math.ceil(c1 / tw), tiles_across) + + band_count = samples if (planar == 2 and samples > 1) else 1 + tiles_per_band = tiles_across * tiles_down + + # Build list of tiles to decode. Sparse tiles (byte_count==0) are + # skipped here -- the result is pre-filled with the sparse fill value. + tile_jobs = [] + for band_idx in range(band_count): + band_tile_offset = band_idx * tiles_per_band if band_count > 1 else 0 + tile_samples = 1 if band_count > 1 else samples + + for tr in range(tile_row_start, tile_row_end): + for tc in range(tile_col_start, tile_col_end): + tile_idx = band_tile_offset + tr * tiles_across + tc + if tile_idx >= len(offsets): + continue + if byte_counts[tile_idx] == 0: + continue + tile_jobs.append((band_idx, tr, tc, tile_idx, tile_samples)) + + # Decode tiles in parallel when the work per tile is large enough to + # outweigh the thread-pool overhead. Uncompressed multi-tile reads also + # benefit because numpy frombuffer + slice copies aren't free at large + # tile sizes. Threshold is shared with the HTTP COG path below + # (issue #1551). + n_tiles = len(tile_jobs) + tile_pixels = tw * th + use_parallel = (n_tiles > 1 and tile_pixels >= _PARALLEL_DECODE_PIXEL_THRESHOLD) + + def _decode_one(job): + band_idx, tr, tc, tile_idx, tile_samples = job + tile_data = data[offsets[tile_idx]:offsets[tile_idx] + byte_counts[tile_idx]] + return _decode_strip_or_tile( + tile_data, compression, tw, th, tile_samples, + bps, bytes_per_sample, is_sub_byte, dtype, pred, + byte_order=header.byte_order, + jpeg_tables=jpeg_tables, + masked_fill=masked_fill) + + if use_parallel: + # Function-local import (rather than the module-level binding) + # so tests that monkey-patch ``concurrent.futures.ThreadPoolExecutor`` + # see the spy class here. Issue #1551. + from concurrent.futures import ThreadPoolExecutor + n_workers = min(n_tiles, _os_module.cpu_count() or 4) + with ThreadPoolExecutor(max_workers=n_workers) as pool: + decoded = list(pool.map(_decode_one, tile_jobs)) + else: + decoded = [_decode_one(job) for job in tile_jobs] + + # Place decoded tiles into the output array + for (band_idx, tr, tc, tile_idx, tile_samples), tile_pixels in zip(tile_jobs, decoded): + tile_r0 = tr * th + tile_c0 = tc * tw + + src_r0 = max(r0 - tile_r0, 0) + src_c0 = max(c0 - tile_c0, 0) + src_r1 = min(r1 - tile_r0, th) + src_c1 = min(c1 - tile_c0, tw) + + dst_r0 = max(tile_r0 - r0, 0) + dst_c0 = max(tile_c0 - c0, 0) + + actual_tile_h = min(th, height - tile_r0) + actual_tile_w = min(tw, width - tile_c0) + src_r1 = min(src_r1, actual_tile_h) + src_c1 = min(src_c1, actual_tile_w) + dst_r1 = dst_r0 + (src_r1 - src_r0) + dst_c1 = dst_c0 + (src_c1 - src_c0) + + if dst_r1 > dst_r0 and dst_c1 > dst_c0: + src_slice = tile_pixels[src_r0:src_r1, src_c0:src_c1] + if band_count > 1: + result[dst_r0:dst_r1, dst_c0:dst_c1, band_idx] = src_slice + else: + result[dst_r0:dst_r1, dst_c0:dst_c1] = src_slice + + return result + + +# --------------------------------------------------------------------------- +# Orientation and photometric post-processing +# --------------------------------------------------------------------------- + +def _apply_orientation(arr: np.ndarray, orientation: int) -> np.ndarray: + """Reorient a decoded TIFF array according to the Orientation tag (274). + + The TIFF 6.0 spec defines eight orientations describing where the + *first row* and *first column* of the stored data sit relative to the + visual top-left of the image: + + === ================= ======================================== + 1 top-left identity (default, no transform) + 2 top-right mirror horizontally (flip columns) + 3 bottom-right rotate 180 degrees + 4 bottom-left mirror vertically (flip rows) + 5 left-top transpose (rows<->columns) + 6 right-top rotate 90 clockwise + 7 right-bottom transverse (anti-transpose) + 8 left-bottom rotate 90 counter-clockwise + === ================= ======================================== + + Values 5-8 swap rows and columns: the file's stored width becomes the + output's height and vice versa. + + The input ``arr`` is shaped ``(height, width)`` or + ``(height, width, samples)``. Multi-band 3D arrays only have their + first two axes transformed; the sample axis is preserved. + """ + if orientation == 1: + return arr + if orientation == 2: + return np.ascontiguousarray(arr[:, ::-1]) + if orientation == 3: + return np.ascontiguousarray(arr[::-1, ::-1]) + if orientation == 4: + return np.ascontiguousarray(arr[::-1, :]) + # Orientations 5-8 swap rows and columns. + if arr.ndim == 3: + # Transpose only the spatial axes; keep the sample axis trailing. + if orientation == 5: + return np.ascontiguousarray(arr.transpose(1, 0, 2)) + if orientation == 6: + return np.ascontiguousarray(arr.transpose(1, 0, 2)[:, ::-1]) + if orientation == 7: + return np.ascontiguousarray(arr.transpose(1, 0, 2)[::-1, ::-1]) + if orientation == 8: + return np.ascontiguousarray(arr.transpose(1, 0, 2)[::-1, :]) + else: + if orientation == 5: + return np.ascontiguousarray(arr.T) + if orientation == 6: + return np.ascontiguousarray(arr.T[:, ::-1]) + if orientation == 7: + return np.ascontiguousarray(arr.T[::-1, ::-1]) + if orientation == 8: + return np.ascontiguousarray(arr.T[::-1, :]) + raise ValueError( + f"Invalid TIFF Orientation tag value: {orientation} " + f"(must be 1-8 per TIFF 6.0)" + ) + + +def _apply_orientation_with_geo( + arr: np.ndarray, geo_info: GeoInfo, orientation: int, +) -> tuple[np.ndarray, GeoInfo]: + """Apply Orientation tag to ``arr`` and update ``geo_info`` to match. + + Shared helper used by the local-file and HTTP COG paths so both + return the same pixel order and transform for a given file. See + issue #1717 for the HTTP-path parity break this consolidates. + """ + if orientation == 1: + return arr, geo_info + # Use the *file* dimensions (before orientation) for the transform + # math below. After ``_apply_orientation`` the array shape may swap + # (orientations 5-8), so capture them now. + file_h = arr.shape[0] + file_w = arr.shape[1] + arr = _apply_orientation(arr, orientation) + t = geo_info.transform + if not geo_info.has_georef: + pass + elif orientation in (2, 3, 4): + if geo_info.raster_type == RASTER_PIXEL_IS_POINT: + x_shift = file_w - 1 + y_shift = file_h - 1 + else: + x_shift = file_w + y_shift = file_h + new_origin_x = t.origin_x + new_origin_y = t.origin_y + new_px_w = t.pixel_width + new_px_h = t.pixel_height + if orientation in (2, 3): # x flipped + new_origin_x = t.origin_x + x_shift * t.pixel_width + new_px_w = -t.pixel_width + if orientation in (3, 4): # y flipped + new_origin_y = t.origin_y + y_shift * t.pixel_height + new_px_h = -t.pixel_height + geo_info.transform = GeoTransform( + origin_x=new_origin_x, + origin_y=new_origin_y, + pixel_width=new_px_w, + pixel_height=new_px_h, + ) + elif orientation in (5, 6, 7, 8): + # ``has_georef`` is True whenever ModelTransformation, + # ModelPixelScale, or ModelTiepoint is present, even without a + # CRS. The pixel-size swap below cannot express the + # per-orientation origin shift plus rotation these orientations + # require, so the x/y coords would be wrong whether or not a + # CRS tag accompanies the transform. Refuse the file in that + # case rather than warn and return silently wrong coords. + raise NotImplementedError( + f"TIFF Orientation {orientation} on a georeferenced file " + f"requires a per-orientation origin shift plus a rotation " + f"that the axis-aligned GeoTransform used here cannot " + f"represent, so the returned x/y coords would be wrong. " + f"Reproject the file with another tool (e.g. GDAL) or " + f"strip the Orientation tag before reading. See issue " + f"#1765." + ) + return arr, geo_info + + +def _apply_photometric_miniswhite(arr: np.ndarray, ifd: IFD) -> np.ndarray: + """Apply TIFF MinIsWhite inversion for single-band grayscale images.""" + if ifd.photometric != 0 or ifd.samples_per_pixel != 1: + return arr + if arr.dtype.kind == 'u': + return np.iinfo(arr.dtype).max - arr + if arr.dtype.kind == 'f': + return -arr + return arr + + +def _miniswhite_inverted_nodata(nodata, ifd: IFD, dtype: np.dtype): + """Return the nodata sentinel value after MinIsWhite inversion. + + When the reader applied MinIsWhite (``photometric == 0``, + ``samples_per_pixel == 1``), the original integer sentinel ``s`` is + rewritten to ``iinfo(dtype).max - s`` and the float sentinel ``s`` to + ``-s``. Downstream nodata-to-NaN masks must compare against the + inverted sentinel rather than the original, otherwise they flag the + wrong pixels: inverted real data colliding with the original + sentinel value is incorrectly masked while the real nodata cells + keep their inverted-sentinel value (issue #1809). + + Returns the inverted nodata sentinel, or the original ``nodata`` + when MinIsWhite was not applied / not applicable. Non-finite or + out-of-range nodata is returned unchanged so callers' downstream + skip-the-mask logic stays unchanged. + """ + if nodata is None: + return nodata + if ifd.photometric != 0 or ifd.samples_per_pixel != 1: + return nodata + if dtype.kind == 'u': + if not np.isfinite(nodata): + return nodata + if not float(nodata).is_integer(): + return nodata + vi = int(nodata) + info = np.iinfo(dtype) + if not (info.min <= vi <= info.max): + return nodata + return info.max - vi + if dtype.kind == 'f': + if np.isnan(nodata): + return nodata + return -float(nodata) + return nodata diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 3f92268ca..05fb4c8ec 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -30,21 +30,10 @@ # "urllib3 is a hard install dep" guard. import urllib3 # noqa: F401 -from ._compression import ( - COMPRESSION_LERC, - COMPRESSION_NONE, - decompress, - fp_predictor_decode, - lerc_decompress_with_mask, - predictor_decode, - unpack_bits, -) +from ._compression import COMPRESSION_LERC from ._dtypes import SUB_BYTE_BPS, resolve_bits_per_sample, tiff_dtype_to_numpy from ._geotags import ( GeoInfo, - GeoTransform, - RASTER_PIXEL_IS_POINT, - extract_geo_info, extract_geo_info_with_overview_inheritance, ) from ._header import ( @@ -160,229 +149,31 @@ def _check_source_dimensions(width, height, samples): _validate_http_url, ) -#: Per-tile pixel count at and above which the local and HTTP tile-read paths -#: spread codec decode across a ``ThreadPoolExecutor``. Below this, pool -#: startup costs outweigh the parallelism win (issue #1551). Bound is inclusive -#: so the default ``tile_size=256`` (256*256 == 64*1024) lands on the parallel -#: path. Used by both ``_read_tiles`` and ``_fetch_decode_cog_http_tiles``. -_PARALLEL_DECODE_PIXEL_THRESHOLD = 64 * 1024 - - -def _apply_predictor(chunk: np.ndarray, pred: int, width: int, - height: int, bytes_per_sample: int, - samples: int = 1, - byte_order: str = '<') -> np.ndarray: - """Apply the appropriate predictor decode to decompressed data. - - ``width``, ``height``, ``bytes_per_sample``, and ``samples`` describe - the raw pixel layout before predictor inversion: ``width * samples`` - samples per row, each ``bytes_per_sample`` bytes wide. - - Predictor=2 (horizontal differencing) operates at the *sample* level - per TIFF Technical Note (libtiff/GDAL convention): the difference is - taken between adjacent same-component samples in the sample's - natural bit width, with stride equal to ``samples`` samples. A - byte-wise implementation drops the inter-byte carry for multi-byte - samples and produces wrong values. - - Predictor=3 (floating-point) byte-swizzles each row into - ``bytes_per_sample`` interleaved lanes of length ``width * samples``, - per TIFF Technical Note 3. The un-transpose stage has to put the - MSB lane at the file's high-order byte position, which differs for - big- vs little-endian files; ``byte_order`` carries that. - """ - if pred == 2: - return predictor_decode(chunk, width, height, - bytes_per_sample, samples=samples, - byte_order=byte_order) - elif pred == 3: - return fp_predictor_decode(chunk, width * samples, height, - bytes_per_sample, - big_endian=(byte_order == '>')) - return chunk - - -def _packed_byte_count(pixel_count: int, bps: int) -> int: - """Compute the number of packed bytes for sub-byte bit depths.""" - return (pixel_count * bps + 7) // 8 - - -def _int_nodata_in_range(nodata_int: int, dtype: np.dtype) -> bool: - """Return True iff *nodata_int* is representable as *dtype*. - - Used to gate ``dtype.type(int(...))`` casts that would otherwise raise - ``OverflowError`` on real-world files that pair an unsigned dtype with - a negative GDAL_NODATA sentinel (e.g. uint16 + ``-9999``). When the - sentinel cannot be represented, the file's pixels can never match it, - so the caller should treat the sentinel as a no-op for value matching - (still surfacing it via ``attrs['nodata']`` so write round-trips - preserve the original tag). - """ - if dtype.kind not in ('u', 'i'): - return False - info = np.iinfo(dtype) - return info.min <= nodata_int <= info.max - - -def _resolve_masked_fill(nodata_str: str | None, dtype: np.dtype): - """Resolve the value to use when restoring LERC-masked pixels. - - Mirrors :func:`_sparse_fill_value` but defaults to NaN for floating - dtypes when the file does not declare a nodata sentinel. Float - rasters with no GDAL_NODATA tag still benefit from NaN propagation - because LERC's zero fill would silently masquerade as a real - measurement at z == 0. - - Note: integer dtypes with no GDAL_NODATA tag fall back to ``0``, - which is the same value LERC zero-fills masked pixels with -- in - that case the mask application is intentionally a no-op. We avoid - inventing an integer sentinel (e.g. iinfo.max) because doing so - would silently change pixel values for files that never declared - one, breaking downstream consumers that key off the original data. - - Out-of-range integer sentinels (e.g. ``uint16`` paired with - ``GDAL_NODATA="-9999"``, common on legacy GDAL files) cannot be - represented in the file dtype and so cannot match any decoded - pixel; we fall back to ``0`` rather than raising ``OverflowError`` - on the dtype cast. - """ - if nodata_str is not None: - # Try ``int`` first so 64-bit sentinels survive without the - # float64 round-trip; fall back to ``float`` for NaN / Inf / - # scientific notation / fractional values. See issue #1847. - from ._geotags import _parse_nodata_str as _parse_nd - parsed = _parse_nd(nodata_str) - if parsed is not None: - if dtype.kind == 'f': - return dtype.type(parsed) - if isinstance(parsed, int): - if _int_nodata_in_range(parsed, dtype): - return dtype.type(parsed) - elif not math.isnan(parsed) and not math.isinf(parsed): - if float(parsed).is_integer(): - nodata_int = int(parsed) - if _int_nodata_in_range(nodata_int, dtype): - return dtype.type(nodata_int) - if dtype.kind == 'f': - return dtype.type(np.nan) - return dtype.type(0) - - -def _decode_strip_or_tile(data_slice, compression, width, height, samples, - bps, bytes_per_sample, is_sub_byte, dtype, pred, - byte_order='<', jpeg_tables=None, - masked_fill=None): - """Decompress, apply predictor, unpack sub-byte, and reshape a strip/tile. - - Parameters - ---------- - byte_order : str - '<' for little-endian, '>' for big-endian. When the file byte - order differs from the system's native order, pixel data is - byte-swapped after decompression. - jpeg_tables : bytes or None - Raw bytes of the file's JPEGTables tag (347), or None if the file - doesn't have one. GDAL-style tiled JPEG TIFFs store DQT/DHT tables - once in this tag and each tile is a JPEG fragment that depends on - them; the JPEG decoder splices the tables in before handing the - tile to libjpeg. Ignored for non-JPEG compressions. - masked_fill : scalar or None - Fill value written into pixels that the LERC valid-mask flags as - invalid. Only consulted for ``compression == COMPRESSION_LERC`` - when the decoder returns a non-trivial mask; ignored for every - other codec. Callers should compute it once per IFD via - :func:`_resolve_masked_fill` (typically NaN for float dtypes or - the parsed ``GDAL_NODATA`` sentinel). When ``None``, masked - pixels are left at LERC's zero fill. - - Returns an array shaped (height, width) or (height, width, samples). - """ - pixel_count = width * height * samples - if is_sub_byte: - expected = _packed_byte_count(pixel_count, bps) - else: - expected = pixel_count * bytes_per_sample - - lerc_mask = None - if compression == COMPRESSION_LERC: - # LERC needs special handling: lerc.decode also returns a - # valid-mask which the generic decompress() dispatcher discards. - # We capture it here so masked pixels can be restored to nodata - # below, instead of leaking LERC's zero fill into the output. - # Forward ``expected`` so the wrapper rejects bombs at the - # blob-header level rather than after the full buffer is - # materialised (issue #1625). - decoded_bytes, lerc_mask = lerc_decompress_with_mask( - data_slice, expected_size=expected) - chunk = np.frombuffer(decoded_bytes, dtype=np.uint8) - else: - chunk = decompress(data_slice, compression, expected, - width=width, height=height, samples=samples, - jpeg_tables=jpeg_tables) - - # Validate the decompressed byte count. A truncated deflate stream or a - # buggy compressor can produce fewer or more bytes than expected. Without - # this check the downstream reshape raises an opaque "cannot reshape array - # of size N into shape (h, w)" that hides which tile/strip broke. Edge - # tiles in a valid TIFF still decompress to the full tile_height x - # tile_width (the caller slices the top-left region), so this only fires - # on genuine corruption. - if chunk.size != expected: - raise ValueError( - f"Decompressed tile/strip size mismatch: expected {expected} " - f"bytes for a {width} x {height} x {samples} block " - f"(bps={bps}, compression={compression}), got {chunk.size}. " - f"The TIFF data is likely truncated or corrupt." - ) - - if pred in (2, 3) and not is_sub_byte: - if not chunk.flags.writeable: - chunk = chunk.copy() - chunk = _apply_predictor(chunk, pred, width, height, - bytes_per_sample, samples=samples, - byte_order=byte_order) - - if is_sub_byte: - pixels = unpack_bits(chunk, bps, pixel_count) - else: - # Use the file's byte order for the view, then convert to native. - # The view dtype must match the on-disk sample width: float16 - # files (bps=16 + SampleFormat=3) are auto-promoted to float32 - # for the user-visible array, but the raw bytes have to be - # viewed as float16 first then cast (#1941). Detect the - # promotion via the bps-vs-dtype.itemsize mismatch so the - # surrounding pipeline stays unchanged for byte-equal cases. - if dtype.itemsize * 8 != bps and bps == 16 and dtype.kind == 'f': - storage_dtype = np.dtype('float16').newbyteorder(byte_order) - pixels = chunk.view(storage_dtype).astype(dtype) - else: - file_dtype = dtype.newbyteorder(byte_order) - pixels = chunk.view(file_dtype) - if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER): - pixels = pixels.astype(dtype) - - if samples > 1: - out = pixels.reshape(height, width, samples) - else: - out = pixels.reshape(height, width) - - # Restore nodata in positions LERC flagged as invalid. LERC - # zero-fills masked pixels in the data array, which would otherwise - # be indistinguishable from real zero readings downstream. - if lerc_mask is not None and masked_fill is not None: - mask_arr = np.asarray(lerc_mask) - if mask_arr.ndim == 2 and out.ndim == 3: - mask_arr = mask_arr[..., None] - invalid = np.broadcast_to(mask_arr == 0, out.shape) - if invalid.any(): - if not out.flags.writeable: - out = out.copy() - np.putmask(out, invalid, masked_fill) - return out - - -import sys as _sys -_NATIVE_ORDER = '<' if _sys.byteorder == 'little' else '>' +# Strip/tile decode orchestration lives in ``_decode``. It is imported +# back here so that: +# * existing call sites inside this module (``_read_strips``, +# ``_read_tiles``, ``_decode_strip_or_tile``, the photometric and +# orientation helpers) keep their bare names, and +# * the historical public import surface +# (``from xrspatial.geotiff._reader import _read_strips`` and +# friends, used by VRT / GPU / dask backends, the writer, and the +# test suite) is preserved without churn. +# Source: PR-G of the GeoTIFF refactor epic, issue #2246. +from ._decode import ( # noqa: F401 + _NATIVE_ORDER, + _PARALLEL_DECODE_PIXEL_THRESHOLD, + _apply_orientation, + _apply_orientation_with_geo, + _apply_photometric_miniswhite, + _apply_predictor, + _decode_strip_or_tile, + _int_nodata_in_range, + _miniswhite_inverted_nodata, + _packed_byte_count, + _read_strips, + _read_tiles, + _resolve_masked_fill, +) def _sparse_fill_value(ifd: IFD, dtype: np.dtype): @@ -468,421 +259,6 @@ def _compute_full_image_byte_budget(offsets, byte_counts) -> int: return max_end + _FULL_IMAGE_BUDGET_HEADER_SLACK -# --------------------------------------------------------------------------- -# Strip reader -# --------------------------------------------------------------------------- - -def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader, - dtype: np.dtype, window=None, - max_pixels: int = MAX_PIXELS_DEFAULT) -> np.ndarray: - """Read a strip-organized TIFF image. - - Parameters - ---------- - data : bytes - Full file data. - ifd : IFD - Parsed IFD for this image. - header : TIFFHeader - File header. - dtype : np.dtype - Output pixel dtype. - window : tuple or None - (row_start, col_start, row_stop, col_stop) or None for full image. - max_pixels : int - Maximum allowed pixel count (width * height * samples). - - Returns - ------- - np.ndarray with shape (height, width) or windowed subset. - """ - width = ifd.width - height = ifd.height - samples = ifd.samples_per_pixel - # Source-IFD dim check (issue #2053). The tiled path is already - # covered by ``validate_tile_layout``; this is its stripped-path - # parity. Run before any window clamping so a malformed - # ``ImageWidth=0`` IFD fails at the source rather than collapsing - # to an empty post-clamp window. - _check_source_dimensions(width, height, samples) - compression = ifd.compression - rps = ifd.rows_per_strip - offsets = ifd.strip_offsets - byte_counts = ifd.strip_byte_counts - pred = ifd.predictor - _validate_predictor_sample_format(pred, ifd.sample_format) - bps = resolve_bits_per_sample(ifd.bits_per_sample) - bytes_per_sample = bps // 8 - is_sub_byte = bps in SUB_BYTE_BPS - jpeg_tables = ifd.jpeg_tables - masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype) - if compression == COMPRESSION_LERC else None) - - if offsets is None or byte_counts is None: - raise ValueError("Missing strip offsets or byte counts") - - # Per-strip compressed-byte cap (issue #1664). Mirrors the HTTP path: - # a crafted ``StripByteCounts`` can declare a huge value and even - # though mmap slicing on the local path is bounded by the file size, - # the slice is still passed into the decompressor which can expand - # a few KiB of crafted deflate/zstd into gigabytes of decoded output. - # Override via ``XRSPATIAL_COG_MAX_TILE_BYTES`` (the env var is shared - # with the tile path because the budget is the same). - max_tile_bytes = _max_tile_bytes_from_env() - for _strip_idx, _bc in enumerate(byte_counts): - if _bc > max_tile_bytes: - raise ValueError( - f"TIFF strip {_strip_idx} declares " - f"StripByteCount={_bc:,} bytes, which exceeds the " - f"per-strip safety cap of {max_tile_bytes:,} bytes. " - f"The file is malformed or attempting denial-of-service. " - f"Override via XRSPATIAL_COG_MAX_TILE_BYTES if this file " - f"is legitimate." - ) - - # A corrupt header can report RowsPerStrip=0, which would divide by zero - # below. Reject it as a typed parse error rather than letting the - # ZeroDivisionError leak out to the caller. - if rps is None or rps <= 0: - raise ValueError(f"Invalid RowsPerStrip: {rps!r}") - - planar = ifd.planar_config # 1=chunky (interleaved), 2=planar (separate) - - # Determine output region - if window is not None: - r0, c0, r1, c1 = window - r0 = max(0, r0) - c0 = max(0, c0) - r1 = min(height, r1) - c1 = min(width, c1) - else: - r0, c0, r1, c1 = 0, 0, height, width - - out_h = r1 - r0 - out_w = c1 - c0 - - _check_dimensions(out_w, out_h, samples, max_pixels) - - # StripByteCounts must have at least one entry per strip; a corrupt count - # field can shrink it. Detect the mismatch after the dimension safety - # check so an oversized header raises the safety-limit error first, then - # raise a typed ValueError here instead of IndexError when the loop - # indexes past the end. - # - # For PlanarConfiguration=2 (separate / planar) each sample plane has its - # own run of strips, so the table must hold strips_per_band * samples - # entries. PlanarConfiguration=1 (chunky) interleaves samples within a - # single run of strips_per_band entries. - strips_per_band = (height + rps - 1) // rps - if planar == 2 and samples > 1: - n_strips_expected = strips_per_band * samples - if len(offsets) < n_strips_expected or len(byte_counts) < n_strips_expected: - raise ValueError( - f"Strip table truncated for planar layout " - f"(PlanarConfiguration=2): expected " - f"{n_strips_expected} entries " - f"({strips_per_band} strips x {samples} samples), got " - f"offsets={len(offsets)}, byte_counts={len(byte_counts)}") - else: - n_strips_expected = strips_per_band - if len(offsets) < n_strips_expected or len(byte_counts) < n_strips_expected: - raise ValueError( - f"Strip table truncated: expected {n_strips_expected} entries, " - f"got offsets={len(offsets)}, byte_counts={len(byte_counts)}") - - # Sparse strips (StripByteCounts == 0) must materialise as nodata or 0 - # rather than be decoded. Pre-fill the result so any skipped strips - # land on a known fill value. - sparse = _has_sparse(byte_counts) - if sparse: - fill = _sparse_fill_value(ifd, dtype) - if samples > 1: - result = np.full((out_h, out_w, samples), fill, dtype=dtype) - else: - result = np.full((out_h, out_w), fill, dtype=dtype) - elif samples > 1: - result = np.empty((out_h, out_w, samples), dtype=dtype) - else: - result = np.empty((out_h, out_w), dtype=dtype) - - # Collect strip jobs; decode in parallel when the pool overhead pays off. - # Mirrors the tile path's gate at ``_PARALLEL_DECODE_PIXEL_THRESHOLD``: - # codec decode (deflate, zstd, LZW) releases the GIL inside the C - # extension so threads actually overlap codec work across cores. The - # placement loop that copies pixels into ``result`` stays serial to - # avoid contending writes to the output buffer. See issue #2100. - strip_jobs: list[tuple[int, int, int]] = [] # (band_idx, strip_idx, global_idx) - if planar == 2 and samples > 1: - first_strip = r0 // rps - last_strip = min((r1 - 1) // rps, strips_per_band - 1) - for band_idx in range(samples): - band_offset = band_idx * strips_per_band - for strip_idx in range(first_strip, last_strip + 1): - global_idx = band_offset + strip_idx - if byte_counts[global_idx] == 0: - continue - strip_row = strip_idx * rps - strip_rows = min(rps, height - strip_row) - if strip_rows <= 0: - continue - strip_jobs.append((band_idx, strip_idx, global_idx)) - strip_samples = 1 - else: - first_strip = r0 // rps - last_strip = min((r1 - 1) // rps, len(offsets) - 1) - for strip_idx in range(first_strip, last_strip + 1): - strip_row = strip_idx * rps - strip_rows = min(rps, height - strip_row) - if strip_rows <= 0: - continue - if byte_counts[strip_idx] == 0: - continue - strip_jobs.append((0, strip_idx, strip_idx)) - strip_samples = samples - - def _decode_strip_job(job): - _band_idx, strip_idx, global_idx = job - strip_row = strip_idx * rps - strip_rows = min(rps, height - strip_row) - strip_data = data[ - offsets[global_idx]:offsets[global_idx] + byte_counts[global_idx]] - return _decode_strip_or_tile( - strip_data, compression, width, strip_rows, strip_samples, - bps, bytes_per_sample, is_sub_byte, dtype, pred, - byte_order=header.byte_order, - jpeg_tables=jpeg_tables, - masked_fill=masked_fill) - - n_strips = len(strip_jobs) - strip_pixel_count = width * rps - use_parallel = (n_strips > 1 - and strip_pixel_count >= _PARALLEL_DECODE_PIXEL_THRESHOLD) - if use_parallel: - n_workers = min(n_strips, _os_module.cpu_count() or 4) - with ThreadPoolExecutor(max_workers=n_workers) as pool: - decoded_strips = list(pool.map(_decode_strip_job, strip_jobs)) - else: - decoded_strips = [_decode_strip_job(job) for job in strip_jobs] - - for (band_idx, strip_idx, _global_idx), strip_pixels in zip( - strip_jobs, decoded_strips): - strip_row = strip_idx * rps - strip_rows = min(rps, height - strip_row) - src_r0 = max(r0 - strip_row, 0) - src_r1 = min(r1 - strip_row, strip_rows) - dst_r0 = max(strip_row - r0, 0) - dst_r1 = dst_r0 + (src_r1 - src_r0) - if dst_r1 > dst_r0: - if planar == 2 and samples > 1: - result[dst_r0:dst_r1, :, band_idx] = ( - strip_pixels[src_r0:src_r1, c0:c1]) - else: - result[dst_r0:dst_r1] = strip_pixels[src_r0:src_r1, c0:c1] - - return result - - -# --------------------------------------------------------------------------- -# Tile reader -# --------------------------------------------------------------------------- - -def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, - dtype: np.dtype, window=None, - max_pixels: int = MAX_PIXELS_DEFAULT) -> np.ndarray: - """Read a tile-organized TIFF image. - - Parameters - ---------- - data : bytes - Full file data. - ifd : IFD - Parsed IFD for this image. - header : TIFFHeader - File header. - dtype : np.dtype - Output pixel dtype. - window : tuple or None - (row_start, col_start, row_stop, col_stop) or None for full image. - max_pixels : int - Maximum allowed pixel count (width * height * samples). - - Returns - ------- - np.ndarray with shape (height, width) or windowed subset. - """ - width = ifd.width - height = ifd.height - tw = ifd.tile_width - th = ifd.tile_height - samples = ifd.samples_per_pixel - compression = ifd.compression - pred = ifd.predictor - _validate_predictor_sample_format(pred, ifd.sample_format) - bps = resolve_bits_per_sample(ifd.bits_per_sample) - bytes_per_sample = bps // 8 - is_sub_byte = bps in SUB_BYTE_BPS - jpeg_tables = ifd.jpeg_tables - masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype) - if compression == COMPRESSION_LERC else None) - - offsets = ifd.tile_offsets - byte_counts = ifd.tile_byte_counts - if offsets is None or byte_counts is None: - raise ValueError("Missing tile offsets or byte counts") - - if tw <= 0 or th <= 0: - raise ValueError( - f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}") - - # Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the - # TIFF header against malformed values; it is not the caller's output - # budget. The output-window check below uses ``max_pixels`` and is - # what enforces the user's per-call memory cap. The source-read path - # under ``read_vrt`` (#1796) relies on that output check to honour a - # small caller ``max_pixels`` against a normal-tile source; see - # #1823. - _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT) - - # Per-tile compressed-byte cap (issue #1664). Same env var as the - # HTTP path. mmap slicing is bounded by the file size, but the slice - # gets handed to the decompressor, and a small slice can balloon - # into gigabytes through deflate / zstd / lzw / lerc. - max_tile_bytes = _max_tile_bytes_from_env() - for _tile_idx, _bc in enumerate(byte_counts): - if _bc > max_tile_bytes: - raise ValueError( - f"TIFF tile {_tile_idx} declares " - f"TileByteCount={_bc:,} bytes, which exceeds the " - f"per-tile safety cap of {max_tile_bytes:,} bytes. " - f"The file is malformed or attempting denial-of-service. " - f"Override via XRSPATIAL_COG_MAX_TILE_BYTES if this file " - f"is legitimate." - ) - - planar = ifd.planar_config - tiles_across = math.ceil(width / tw) - tiles_down = math.ceil(height / th) - - if window is not None: - r0, c0, r1, c1 = window - r0 = max(0, r0) - c0 = max(0, c0) - r1 = min(height, r1) - c1 = min(width, c1) - else: - r0, c0, r1, c1 = 0, 0, height, width - - out_h = r1 - r0 - out_w = c1 - c0 - - _check_dimensions(out_w, out_h, samples, max_pixels) - - # Reject malformed TIFFs whose declared tile grid exceeds the number of - # supplied TileOffsets entries. Silent skipping in the CPU loop below - # would mask the problem, and the GPU path reads OOB. See issue #1219. - validate_tile_layout(ifd) - - # Sparse tiles (TileByteCounts == 0) must materialise as nodata or 0 - # rather than be decoded. Pre-fill the result so any skipped tiles - # land on a known fill value; otherwise sparse regions would leak - # uninitialised memory (full-image read) or stay zeroed regardless - # of the file's nodata setting (windowed read). - sparse = _has_sparse(byte_counts) - if sparse: - fill = _sparse_fill_value(ifd, dtype) - if samples > 1: - result = np.full((out_h, out_w, samples), fill, dtype=dtype) - else: - result = np.full((out_h, out_w), fill, dtype=dtype) - else: - _alloc = np.zeros if window is not None else np.empty - if samples > 1: - result = _alloc((out_h, out_w, samples), dtype=dtype) - else: - result = _alloc((out_h, out_w), dtype=dtype) - - tile_row_start = r0 // th - tile_row_end = min(math.ceil(r1 / th), tiles_down) - tile_col_start = c0 // tw - tile_col_end = min(math.ceil(c1 / tw), tiles_across) - - band_count = samples if (planar == 2 and samples > 1) else 1 - tiles_per_band = tiles_across * tiles_down - - # Build list of tiles to decode. Sparse tiles (byte_count==0) are - # skipped here -- the result is pre-filled with the sparse fill value. - tile_jobs = [] - for band_idx in range(band_count): - band_tile_offset = band_idx * tiles_per_band if band_count > 1 else 0 - tile_samples = 1 if band_count > 1 else samples - - for tr in range(tile_row_start, tile_row_end): - for tc in range(tile_col_start, tile_col_end): - tile_idx = band_tile_offset + tr * tiles_across + tc - if tile_idx >= len(offsets): - continue - if byte_counts[tile_idx] == 0: - continue - tile_jobs.append((band_idx, tr, tc, tile_idx, tile_samples)) - - # Decode tiles in parallel when the work per tile is large enough to - # outweigh the thread-pool overhead. Uncompressed multi-tile reads also - # benefit because numpy frombuffer + slice copies aren't free at large - # tile sizes. Threshold is shared with the HTTP COG path below - # (issue #1551). - n_tiles = len(tile_jobs) - tile_pixels = tw * th - use_parallel = (n_tiles > 1 and tile_pixels >= _PARALLEL_DECODE_PIXEL_THRESHOLD) - - def _decode_one(job): - band_idx, tr, tc, tile_idx, tile_samples = job - tile_data = data[offsets[tile_idx]:offsets[tile_idx] + byte_counts[tile_idx]] - return _decode_strip_or_tile( - tile_data, compression, tw, th, tile_samples, - bps, bytes_per_sample, is_sub_byte, dtype, pred, - byte_order=header.byte_order, - jpeg_tables=jpeg_tables, - masked_fill=masked_fill) - - if use_parallel: - from concurrent.futures import ThreadPoolExecutor - import os as _os - n_workers = min(n_tiles, _os.cpu_count() or 4) - with ThreadPoolExecutor(max_workers=n_workers) as pool: - decoded = list(pool.map(_decode_one, tile_jobs)) - else: - decoded = [_decode_one(job) for job in tile_jobs] - - # Place decoded tiles into the output array - for (band_idx, tr, tc, tile_idx, tile_samples), tile_pixels in zip(tile_jobs, decoded): - tile_r0 = tr * th - tile_c0 = tc * tw - - src_r0 = max(r0 - tile_r0, 0) - src_c0 = max(c0 - tile_c0, 0) - src_r1 = min(r1 - tile_r0, th) - src_c1 = min(c1 - tile_c0, tw) - - dst_r0 = max(tile_r0 - r0, 0) - dst_c0 = max(tile_c0 - c0, 0) - - actual_tile_h = min(th, height - tile_r0) - actual_tile_w = min(tw, width - tile_c0) - src_r1 = min(src_r1, actual_tile_h) - src_c1 = min(src_c1, actual_tile_w) - dst_r1 = dst_r0 + (src_r1 - src_r0) - dst_c1 = dst_c0 + (src_c1 - src_c0) - - if dst_r1 > dst_r0 and dst_c1 > dst_c0: - src_slice = tile_pixels[src_r0:src_r1, src_c0:src_c1] - if band_count > 1: - result[dst_r0:dst_r1, dst_c0:dst_c1, band_idx] = src_slice - else: - result[dst_r0:dst_r1, dst_c0:dst_c1] = src_slice - - return result - - # --------------------------------------------------------------------------- # COG HTTP reader # --------------------------------------------------------------------------- @@ -1778,177 +1154,6 @@ def _decode_one(tile_data): # Main read function # --------------------------------------------------------------------------- -def _apply_orientation(arr: np.ndarray, orientation: int) -> np.ndarray: - """Reorient a decoded TIFF array according to the Orientation tag (274). - - The TIFF 6.0 spec defines eight orientations describing where the - *first row* and *first column* of the stored data sit relative to the - visual top-left of the image: - - === ================= ======================================== - 1 top-left identity (default, no transform) - 2 top-right mirror horizontally (flip columns) - 3 bottom-right rotate 180 degrees - 4 bottom-left mirror vertically (flip rows) - 5 left-top transpose (rows<->columns) - 6 right-top rotate 90 clockwise - 7 right-bottom transverse (anti-transpose) - 8 left-bottom rotate 90 counter-clockwise - === ================= ======================================== - - Values 5-8 swap rows and columns: the file's stored width becomes the - output's height and vice versa. - - The input ``arr`` is shaped ``(height, width)`` or - ``(height, width, samples)``. Multi-band 3D arrays only have their - first two axes transformed; the sample axis is preserved. - """ - if orientation == 1: - return arr - if orientation == 2: - return np.ascontiguousarray(arr[:, ::-1]) - if orientation == 3: - return np.ascontiguousarray(arr[::-1, ::-1]) - if orientation == 4: - return np.ascontiguousarray(arr[::-1, :]) - # Orientations 5-8 swap rows and columns. - if arr.ndim == 3: - # Transpose only the spatial axes; keep the sample axis trailing. - if orientation == 5: - return np.ascontiguousarray(arr.transpose(1, 0, 2)) - if orientation == 6: - return np.ascontiguousarray(arr.transpose(1, 0, 2)[:, ::-1]) - if orientation == 7: - return np.ascontiguousarray(arr.transpose(1, 0, 2)[::-1, ::-1]) - if orientation == 8: - return np.ascontiguousarray(arr.transpose(1, 0, 2)[::-1, :]) - else: - if orientation == 5: - return np.ascontiguousarray(arr.T) - if orientation == 6: - return np.ascontiguousarray(arr.T[:, ::-1]) - if orientation == 7: - return np.ascontiguousarray(arr.T[::-1, ::-1]) - if orientation == 8: - return np.ascontiguousarray(arr.T[::-1, :]) - raise ValueError( - f"Invalid TIFF Orientation tag value: {orientation} " - f"(must be 1-8 per TIFF 6.0)" - ) - - -def _apply_orientation_with_geo( - arr: np.ndarray, geo_info: GeoInfo, orientation: int, -) -> tuple[np.ndarray, GeoInfo]: - """Apply Orientation tag to ``arr`` and update ``geo_info`` to match. - - Shared helper used by the local-file and HTTP COG paths so both - return the same pixel order and transform for a given file. See - issue #1717 for the HTTP-path parity break this consolidates. - """ - if orientation == 1: - return arr, geo_info - # Use the *file* dimensions (before orientation) for the transform - # math below. After ``_apply_orientation`` the array shape may swap - # (orientations 5-8), so capture them now. - file_h = arr.shape[0] - file_w = arr.shape[1] - arr = _apply_orientation(arr, orientation) - t = geo_info.transform - if not geo_info.has_georef: - pass - elif orientation in (2, 3, 4): - if geo_info.raster_type == RASTER_PIXEL_IS_POINT: - x_shift = file_w - 1 - y_shift = file_h - 1 - else: - x_shift = file_w - y_shift = file_h - new_origin_x = t.origin_x - new_origin_y = t.origin_y - new_px_w = t.pixel_width - new_px_h = t.pixel_height - if orientation in (2, 3): # x flipped - new_origin_x = t.origin_x + x_shift * t.pixel_width - new_px_w = -t.pixel_width - if orientation in (3, 4): # y flipped - new_origin_y = t.origin_y + y_shift * t.pixel_height - new_px_h = -t.pixel_height - geo_info.transform = GeoTransform( - origin_x=new_origin_x, - origin_y=new_origin_y, - pixel_width=new_px_w, - pixel_height=new_px_h, - ) - elif orientation in (5, 6, 7, 8): - # ``has_georef`` is True whenever ModelTransformation, - # ModelPixelScale, or ModelTiepoint is present, even without a - # CRS. The pixel-size swap below cannot express the - # per-orientation origin shift plus rotation these orientations - # require, so the x/y coords would be wrong whether or not a - # CRS tag accompanies the transform. Refuse the file in that - # case rather than warn and return silently wrong coords. - raise NotImplementedError( - f"TIFF Orientation {orientation} on a georeferenced file " - f"requires a per-orientation origin shift plus a rotation " - f"that the axis-aligned GeoTransform used here cannot " - f"represent, so the returned x/y coords would be wrong. " - f"Reproject the file with another tool (e.g. GDAL) or " - f"strip the Orientation tag before reading. See issue " - f"#1765." - ) - return arr, geo_info - - -def _apply_photometric_miniswhite(arr: np.ndarray, ifd: IFD) -> np.ndarray: - """Apply TIFF MinIsWhite inversion for single-band grayscale images.""" - if ifd.photometric != 0 or ifd.samples_per_pixel != 1: - return arr - if arr.dtype.kind == 'u': - return np.iinfo(arr.dtype).max - arr - if arr.dtype.kind == 'f': - return -arr - return arr - - -def _miniswhite_inverted_nodata(nodata, ifd: IFD, dtype: np.dtype): - """Return the nodata sentinel value after MinIsWhite inversion. - - When the reader applied MinIsWhite (``photometric == 0``, - ``samples_per_pixel == 1``), the original integer sentinel ``s`` is - rewritten to ``iinfo(dtype).max - s`` and the float sentinel ``s`` to - ``-s``. Downstream nodata-to-NaN masks must compare against the - inverted sentinel rather than the original, otherwise they flag the - wrong pixels: inverted real data colliding with the original - sentinel value is incorrectly masked while the real nodata cells - keep their inverted-sentinel value (issue #1809). - - Returns the inverted nodata sentinel, or the original ``nodata`` - when MinIsWhite was not applied / not applicable. Non-finite or - out-of-range nodata is returned unchanged so callers' downstream - skip-the-mask logic stays unchanged. - """ - if nodata is None: - return nodata - if ifd.photometric != 0 or ifd.samples_per_pixel != 1: - return nodata - if dtype.kind == 'u': - if not np.isfinite(nodata): - return nodata - if not float(nodata).is_integer(): - return nodata - vi = int(nodata) - info = np.iinfo(dtype) - if not (info.min <= vi <= info.max): - return nodata - return info.max - vi - if dtype.kind == 'f': - if np.isnan(nodata): - return nodata - return -float(nodata) - return nodata - - def _read_to_array(source, *, window=None, overview_level: int | None = None, band: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, diff --git a/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py b/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py index 4b6c4758e..20f38b36c 100644 --- a/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py +++ b/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py @@ -8,6 +8,7 @@ """ from __future__ import annotations +import concurrent.futures import http.server import os import socket @@ -77,8 +78,13 @@ def test_parallel_pool_engages_on_multi_strip(self, tmp_path): p = str(tmp_path / "s.tif") with open(p, "wb") as f: f.write(blob) - with patch.object(_reader_mod, "ThreadPoolExecutor", - wraps=_reader_mod.ThreadPoolExecutor) as mock_pool: + # Patch ``concurrent.futures.ThreadPoolExecutor`` rather than the + # reader module binding because the strip decode lives in + # ``_decode`` and re-imports the executor function-locally (PR-G, + # issue #2246). + with patch.object(concurrent.futures, "ThreadPoolExecutor", + wraps=concurrent.futures.ThreadPoolExecutor + ) as mock_pool: out, _ = read_to_array(p) # n_strips for a 1024-row file with default rps -> at # least 4 strips (TIFFs default rps=8KB / row). @@ -92,8 +98,9 @@ def test_serial_path_used_for_small_strip(self, tmp_path): da = xr.DataArray(arr, dims=["y", "x"]) p = str(tmp_path / "tiny.tif") to_geotiff(da, p, compression="deflate", tiled=False) - with patch.object(_reader_mod, "ThreadPoolExecutor", - wraps=_reader_mod.ThreadPoolExecutor) as mock_pool: + with patch.object(concurrent.futures, "ThreadPoolExecutor", + wraps=concurrent.futures.ThreadPoolExecutor + ) as mock_pool: out, _ = read_to_array(p) # Single-strip file => no pool. assert not mock_pool.called diff --git a/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py b/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py index c8b0751f9..363be0892 100644 --- a/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py +++ b/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py @@ -34,6 +34,7 @@ """ from __future__ import annotations +import concurrent.futures import http.server import os import socket @@ -184,9 +185,12 @@ def test_parallel_pool_engages_on_sparse_multi_strip(self, tmp_path): # engages because n_strips = 4 > 1 and strip_pixel_count # = 2048 * 64 = 131_072 >= 65_536. _write_sparse_stripped_large(path, filled_rows=256) + # Patch ``concurrent.futures.ThreadPoolExecutor`` rather than the + # reader module binding: strip decode lives in ``_decode`` after + # PR-G (issue #2246) and re-imports the executor function-locally. with patch.object( - _reader_mod, "ThreadPoolExecutor", - wraps=_reader_mod.ThreadPoolExecutor) as mock_pool: + concurrent.futures, "ThreadPoolExecutor", + wraps=concurrent.futures.ThreadPoolExecutor) as mock_pool: out, _ = read_to_array(path) assert mock_pool.called, ( "parallel-decode pool was not engaged for a multi-strip " @@ -229,8 +233,8 @@ def test_all_sparse_image_returns_fill(self, tmp_path): path = str(tmp_path / "all_sparse.tif") _write_sparse_stripped_large(path, filled_rows=0) with patch.object( - _reader_mod, "ThreadPoolExecutor", - wraps=_reader_mod.ThreadPoolExecutor) as mock_pool: + concurrent.futures, "ThreadPoolExecutor", + wraps=concurrent.futures.ThreadPoolExecutor) as mock_pool: out, _ = read_to_array(path) # All strips sparse → no jobs → no pool. assert not mock_pool.called, ( From 9e2906e66e246265c9c839f411355dc91d1a96fb Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 07:32:31 -0700 Subject: [PATCH 2/3] Address review feedback on _decode extraction (#2246) - Docstring: spell out that _decode.py deliberately has no top-level _reader import so the two modules can sit on either side of the circular relationship. - Restore the ``max_pixels: int`` annotation on _read_strips and _read_tiles. _MAX_PIXELS_UNSET is still the runtime default so the lookup of MAX_PIXELS_DEFAULT can stay lazy. - Cross-reference issue #1551 in the strip path's ThreadPoolExecutor comment so it matches the tile path note. - Update local-path _PARALLEL_DECODE_PIXEL_THRESHOLD patches in the parallel-strip tests to target ``_decode`` instead of ``_reader``. After PR-G the live binding _read_strips reads is in _decode; the back-imported name in _reader is a separate reference, so patching _reader alone silently no-ops the "force serial" half of the parallel-vs-serial parity tests. HTTP-path patches stay on _reader because _fetch_decode_cog_http_strips still lives there and uses the back-imported binding. --- xrspatial/geotiff/_decode.py | 17 ++++++++++++++--- .../tests/test_parallel_strip_decode_2100.py | 12 ++++++++---- .../test_parallel_strip_decode_sparse_2100.py | 11 ++++++++--- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/xrspatial/geotiff/_decode.py b/xrspatial/geotiff/_decode.py index 87e3f2ada..363c655ce 100644 --- a/xrspatial/geotiff/_decode.py +++ b/xrspatial/geotiff/_decode.py @@ -13,6 +13,16 @@ by re-importing the names defined here back at the call sites that the test suite and downstream backends depend on. See the import block at the top of ``_reader.py``. + +This module deliberately has *no* module-level import of +:mod:`xrspatial.geotiff._reader` so the two files can sit on either side +of a circular relationship: ``_reader.py`` imports the decode functions +back at module load, and the few things ``_decode`` still needs from +``_reader`` (``MAX_PIXELS_DEFAULT``, ``_check_dimensions``, +``_check_source_dimensions``, ``_sparse_fill_value``, ``_has_sparse``) +are imported lazily inside ``_read_strips`` / ``_read_tiles`` at call +time. Those names move with ``_layout.py`` in PR-H (issue #2247), at +which point the lazy imports can collapse back into top-level ones. """ from __future__ import annotations @@ -287,7 +297,7 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples, def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader, dtype: np.dtype, window=None, - max_pixels=_MAX_PIXELS_UNSET) -> np.ndarray: + max_pixels: int = _MAX_PIXELS_UNSET) -> np.ndarray: # type: ignore[assignment] """Read a strip-organized TIFF image. Parameters @@ -486,7 +496,8 @@ def _decode_strip_job(job): if use_parallel: # Function-local import (rather than the module-level binding) # so tests that monkey-patch ``concurrent.futures.ThreadPoolExecutor`` - # see the spy class here. The tile path uses the same pattern. + # see the spy class here. The tile path uses the same pattern; + # both gates share ``_PARALLEL_DECODE_PIXEL_THRESHOLD`` (#1551). from concurrent.futures import ThreadPoolExecutor n_workers = min(n_strips, _os_module.cpu_count() or 4) with ThreadPoolExecutor(max_workers=n_workers) as pool: @@ -518,7 +529,7 @@ def _decode_strip_job(job): def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, dtype: np.dtype, window=None, - max_pixels=_MAX_PIXELS_UNSET) -> np.ndarray: + max_pixels: int = _MAX_PIXELS_UNSET) -> np.ndarray: # type: ignore[assignment] """Read a tile-organized TIFF image. Parameters diff --git a/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py b/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py index 20f38b36c..b43f64d6f 100644 --- a/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py +++ b/xrspatial/geotiff/tests/test_parallel_strip_decode_2100.py @@ -22,6 +22,7 @@ from xrspatial.geotiff import to_geotiff from xrspatial.geotiff import _reader as _reader_mod +from xrspatial.geotiff import _decode as _decode_mod from xrspatial.geotiff._reader import read_to_array @@ -64,7 +65,10 @@ def test_parallel_decode_matches_serial(self, tmp_path): with open(p, "wb") as f: f.write(blob) par, _ = read_to_array(p) - with patch.object(_reader_mod, + # Patch the threshold in ``_decode`` (where ``_read_strips`` lives + # after PR-G, issue #2246), not in ``_reader``: the back-imported + # binding in ``_reader`` is a separate reference. + with patch.object(_decode_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", 10**12): ser, _ = read_to_array(p) np.testing.assert_array_equal(par, ser) @@ -114,7 +118,7 @@ def test_windowed_strip_read_parallel(self, tmp_path): with open(p, "wb") as f: f.write(blob) par, _ = read_to_array(p, window=(100, 100, 1500, 1500)) - with patch.object(_reader_mod, + with patch.object(_decode_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", 10**12): ser, _ = read_to_array(p, window=(100, 100, 1500, 1500)) np.testing.assert_array_equal(par, ser) @@ -283,7 +287,7 @@ def test_parallel_matches_serial_planar2(self, tmp_path): par, _ = read_to_array(p) with patch.object( - _reader_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", + _decode_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", 10 ** 12): ser, _ = read_to_array(p) @@ -311,7 +315,7 @@ def test_windowed_planar2_parallel(self, tmp_path): par, _ = read_to_array(p, window=(100, 100, 900, 900)) with patch.object( - _reader_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", + _decode_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", 10 ** 12): ser, _ = read_to_array(p, window=(100, 100, 900, 900)) diff --git a/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py b/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py index 363be0892..e9b136b2e 100644 --- a/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py +++ b/xrspatial/geotiff/tests/test_parallel_strip_decode_sparse_2100.py @@ -53,6 +53,7 @@ from xrspatial.geotiff._reader import read_to_array # noqa: E402 from xrspatial.geotiff import _reader as _reader_mod # noqa: E402 +from xrspatial.geotiff import _decode as _decode_mod # noqa: E402 # Local-strip helpers ------------------------------------------------------- @@ -164,8 +165,12 @@ def test_full_image_parallel_matches_serial(self, tmp_path): _write_sparse_stripped_large(path) par, _ = read_to_array(path) + # Patch the threshold in ``_decode`` (PR-G #2246 home of + # ``_read_strips``), not in ``_reader``: the back-imported name in + # ``_reader`` is a separate reference and patching it would leave + # the live binding in ``_decode`` unchanged. with patch.object( - _reader_mod, + _decode_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", 10 ** 12): ser, _ = read_to_array(path) @@ -213,7 +218,7 @@ def test_windowed_across_sparse_boundary(self, tmp_path): win = (128, 0, 384, 1024) # row range [128, 384), col range [0, 1024) par, _ = read_to_array(path, window=win) with patch.object( - _reader_mod, + _decode_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", 10 ** 12): ser, _ = read_to_array(path, window=win) @@ -272,7 +277,7 @@ def test_planar2_sparse_parallel_matches_serial(self, tmp_path): par, _ = read_to_array(path) with patch.object( - _reader_mod, + _decode_mod, "_PARALLEL_DECODE_PIXEL_THRESHOLD", 10 ** 12): ser, _ = read_to_array(path) From 20bdb28bea15f9a42652ff5a2ad827d3fa214da4 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 08:46:18 -0700 Subject: [PATCH 3/3] test(http_cog_coalesce): assert on RTTs saved instead of 2x ratio The previous assertion required coalesced wall time to be less than half the baseline, which couples the test to per-tile decode cost on the runner. On macOS arm64 in CI the constant decode overhead is large enough that the ratio fails even when coalescing correctly saves the expected ~7 RTTs of network time. Assert on absolute RTTs saved instead, which is what coalescing actually controls. --- xrspatial/geotiff/tests/test_http_cog_coalesce.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/xrspatial/geotiff/tests/test_http_cog_coalesce.py b/xrspatial/geotiff/tests/test_http_cog_coalesce.py index 3b512a6dd..77fa95534 100644 --- a/xrspatial/geotiff/tests/test_http_cog_coalesce.py +++ b/xrspatial/geotiff/tests/test_http_cog_coalesce.py @@ -213,12 +213,15 @@ def test_read_cog_http_perf_with_mock_rtt(small_cog_bytes, monkeypatch): coalesced = time.perf_counter() - t0 np.testing.assert_array_equal(arr2, expected) - # Coalesced has to be at least 2x faster on a 50 ms RTT 64-tile read. - # Baseline = ceil(64/8) * 50 ms = 400 ms; coalesced = ~50 ms for the - # single merged GET plus ~50 ms for the IFD read = ~100 ms. - assert coalesced * 2 < baseline, ( - f'coalesced wall time {coalesced:.3f}s should be much less than ' - f'baseline {baseline:.3f}s' + # Assert on RTTs saved, not on a wall-time ratio. The baseline pays + # ceil(64/8) = 8 RTTs; the coalesced path pays 1 merged GET plus the + # IFD read = ~2 RTTs. The other ~6 RTTs of saved wall time are what + # the assertion checks. A ratio assertion would couple this to per-tile + # decode cost, which varies a lot across CI runners. + rtts_saved = (baseline - coalesced) / rtt + assert rtts_saved >= 5, ( + f'coalesced wall time {coalesced:.3f}s should save at least 5 RTTs ' + f'vs baseline {baseline:.3f}s (saved {rtts_saved:.1f} RTTs of {rtt:.3f}s)' )