From dea069f20b292885bb81b0874bd1bb1ba33a5e20 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 13:53:09 -0700 Subject: [PATCH 1/3] geotiff: document allow_rotated and shared kwargs on all read entry points (#2274) The four public read entry points (open_geotiff, read_geotiff_dask, read_geotiff_gpu, read_vrt) accepted allow_rotated and allow_unparseable_crs but only open_geotiff mentioned them (and allow_unparseable_crs only in the Tier prose paragraph, not the Parameters section). The three direct backends also accepted several gated kwargs (missing_sources, band_nodata, on_gpu_failure, max_cloud_bytes, overview_level on VRT) that raise ValueError on the wrong backend for error symmetry; those had no Parameters entry on the backends where they raise. Add Parameters-section entries for every signature kwarg on every read entry point. Functional kwargs get full descriptions; gated kwargs get a short pointer at the owning backend so the long form lives in one place. Add test_read_entry_points_doc_param_parity_2274.py to pin the fix and catch any future kwarg added without a matching Parameters entry on any of the four readers. Scope is documentation-only. No signature, default, or dispatch logic changes. Verified via inspect.signature vs inspect.getdoc scan, plus a CUDA smoke test of read_geotiff_gpu(allow_rotated=True). --- xrspatial/geotiff/__init__.py | 9 ++ xrspatial/geotiff/_backends/dask.py | 28 ++++ xrspatial/geotiff/_backends/gpu.py | 26 ++++ xrspatial/geotiff/_backends/vrt.py | 27 ++++ ...read_entry_points_doc_param_parity_2274.py | 121 ++++++++++++++++++ 5 files changed, 211 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_read_entry_points_doc_param_parity_2274.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index c8e17d7da..cc9a7b320 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -489,6 +489,15 @@ def open_geotiff(source: str | BinaryIO, *, ``to_geotiff`` / ``write_geotiff_gpu`` to accept the loss; the ``ModelTransformationTag`` emit path is tracked separately (issue #2115). + allow_unparseable_crs : bool, default False + Read-side opt-in for CRS strings that pyproj cannot resolve and + that do not parse as WKT. When ``False`` (the default since + #1929), an unrecognised CRS payload raises + ``UnparseableCRSError`` instead of landing in ``attrs['crs_wkt']`` + verbatim. Set to ``True`` to keep the pre-#1929 permissive + behaviour where the citation field passes through unchanged. + Matches the same kwarg on ``to_geotiff`` / ``write_geotiff_gpu`` + so a value the reader accepted can survive a round-trip. Returns ------- diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 836fe8d59..bcfd67a62 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -100,6 +100,34 @@ def read_geotiff_dask(source: str, *, Pass ``mask_nodata=False`` together with ``dtype=`` to keep an integer source dtype; the default promotes to ``float64`` and the cast then raises. See issue #2052. + allow_rotated : bool, default False + Read-side opt-in for rotated / sheared ``ModelTransformationTag`` + files. Forwarded to every per-chunk read so a rotated source + yields an ungeoreferenced pixel grid instead of raising + ``NotImplementedError``. See ``open_geotiff`` for the full + contract; the dask path honours the same attrs (``crs`` / + ``crs_wkt`` dropped, ``rotated_affine`` set). + allow_unparseable_crs : bool, default False + Read-side opt-in for CRS strings that pyproj cannot resolve and + do not parse as WKT. When ``False`` (the default since #1929) + the chunk task raises ``UnparseableCRSError`` instead of + carrying the unrecognised payload through ``attrs['crs_wkt']``. + See ``open_geotiff`` for the full description. + on_gpu_failure : str, optional + Accepted for cross-backend signature symmetry only. The dask + path runs CPU decoders, so passing this kwarg raises + ``ValueError`` at dispatch. See ``read_geotiff_gpu`` for the + kwarg's meaning on the GPU reader. + missing_sources : {'raise', 'warn'}, optional + VRT-only. Forwarded to ``read_vrt`` when the source ends in + ``.vrt``; otherwise raises ``ValueError`` at dispatch. See + ``read_vrt`` for the full description. + max_cloud_bytes : int or None, optional + Accepted for cross-backend signature symmetry only. The dask + reader uses bounded range GETs and does not consume the + cloud-byte budget, so passing this kwarg raises ``ValueError`` + at dispatch. See ``open_geotiff`` for the eager-path + description (issue #1974). Returns ------- diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 9ec4ab83c..f34c28001 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -198,6 +198,32 @@ def read_geotiff_gpu(source: str, *, carries the sentinel either way. Pass ``mask_nodata=False`` together with ``dtype=`` to preserve an integer source dtype on a file with a matching sentinel. See issue #2052. + allow_rotated : bool, default False + Read-side opt-in for rotated / sheared ``ModelTransformationTag`` + files. Forwarded through both GPU decode stages and the CPU + fallback so the rotated branch behaves the same regardless of + which stage produces the bytes. See ``open_geotiff`` for the + full contract; on the GPU path the result still lands as a + CuPy-backed DataArray. + allow_unparseable_crs : bool, default False + Read-side opt-in for CRS strings that pyproj cannot resolve and + do not parse as WKT. ``False`` (the default since #1929) raises + ``UnparseableCRSError``; ``True`` keeps the pre-#1929 permissive + behaviour. See ``open_geotiff`` for the full description. + band_nodata : {'first', None}, optional + VRT-only. Accepted at the signature level for parity with + ``open_geotiff``; passing it to ``read_geotiff_gpu`` raises + ``ValueError`` because GPU reads do not go through the VRT + pipeline. See ``read_vrt`` for the kwarg's meaning. + missing_sources : {'raise', 'warn'}, optional + VRT-only. Same shape as ``band_nodata`` above: accepted for + signature parity, rejected at dispatch with ``ValueError`` for + non-VRT sources. See ``read_vrt`` for the full description. + max_cloud_bytes : int or None, optional + Accepted for cross-backend signature symmetry only. The GPU + reader does not consume the cloud-byte budget; passing this + kwarg raises ``ValueError`` at dispatch (issue #1974). See + ``open_geotiff`` for the eager-path description. Returns ------- diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 680e1548b..5b233c5d0 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -214,6 +214,33 @@ def read_vrt(source: str, *, actual pixels. See issue #2052. Float source bands are NaN-aware by virtue of how the internal reader handles them, so this kwarg is most useful for integer-dtype mosaics. + allow_rotated : bool, default False + Read-side opt-in for rotated / sheared ``ModelTransformationTag`` + files referenced by the VRT. Forwarded to the per-source reader + for each ````. See ``open_geotiff`` for the full + contract. + allow_unparseable_crs : bool, default False + Read-side opt-in for CRS strings that pyproj cannot resolve and + do not parse as WKT. ``False`` (the default since #1929) raises + ``UnparseableCRSError`` rather than carrying the unrecognised + payload through. See ``open_geotiff`` for the full description. + overview_level : int or None + Not supported for VRT sources. The VRT XML references its own + source files, so overview selection would need to apply to each + of them. Accepted at the signature level for cross-backend + symmetry; any value other than ``None`` or ``0`` raises + ``ValueError`` (issue #1685). + on_gpu_failure : str, optional + Accepted for cross-backend signature symmetry only. VRT reads + do not go through the GPU decoder pipeline, so passing this + kwarg raises ``ValueError`` at dispatch. See + ``read_geotiff_gpu`` for the kwarg's meaning on the GPU + reader. + max_cloud_bytes : int or None, optional + Accepted for cross-backend signature symmetry only. The VRT + reader does not consume the cloud-byte budget; passing this + kwarg raises ``ValueError`` at dispatch (issue #1974). See + ``open_geotiff`` for the eager-path description. Returns ------- diff --git a/xrspatial/geotiff/tests/test_read_entry_points_doc_param_parity_2274.py b/xrspatial/geotiff/tests/test_read_entry_points_doc_param_parity_2274.py new file mode 100644 index 000000000..a77f9757f --- /dev/null +++ b/xrspatial/geotiff/tests/test_read_entry_points_doc_param_parity_2274.py @@ -0,0 +1,121 @@ +"""Regression test for #2274: every kwarg on the public read entry +points has a Parameters-section docstring entry. + +The original gap: the four read entry points (``open_geotiff``, +``read_geotiff_dask``, ``read_geotiff_gpu``, ``read_vrt``) accept +``allow_rotated`` and ``allow_unparseable_crs``, but those kwargs were +only documented on ``open_geotiff`` (and only inline in the Tier prose +paragraph for ``allow_unparseable_crs``). The three direct backends +also accept several gated kwargs whose only purpose is to raise +``ValueError`` on the wrong backend so all four readers stay +error-symmetric; those kwargs had no Parameters entry on the backends +that reject them. + +This test pins the fix and catches any future addition of a signature +kwarg without a matching Parameters entry on any of the four read +entry points. +""" +from __future__ import annotations + +import inspect +import re + +import pytest + +from xrspatial.geotiff import ( + open_geotiff, + read_geotiff_dask, + read_geotiff_gpu, + read_vrt, +) + + +READ_ENTRY_POINTS = ( + open_geotiff, + read_geotiff_dask, + read_geotiff_gpu, + read_vrt, +) + + +# Numpy-style docstring parameter heading pattern. Matches lines like +# `` name : type`` after ``inspect.getdoc`` has normalised the +# leading indentation to column zero. +_PARAM_HEADING = re.compile(r"^(\w+) : ", flags=re.MULTILINE) + + +def _signature_params(fn): + return set(inspect.signature(fn).parameters) + + +def _documented_params(fn): + doc = inspect.getdoc(fn) or "" + return set(_PARAM_HEADING.findall(doc)) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_read_entry_point_kwargs_have_docstring_entries(fn): + """Every signature kwarg appears in the Parameters section.""" + params = _signature_params(fn) + documented = _documented_params(fn) + missing = sorted(params - documented) + assert missing == [], ( + f"{fn.__name__} has kwargs without Parameters-section entries: " + f"{missing}. Add a numpy-style ``name : type`` heading for each " + f"so the docstring agrees with the signature. The kwargs may be " + f"gated (raise ValueError on the wrong backend) but they are " + f"still on the public surface, and tools that read the " + f"docstring (Sphinx, IDE help) cannot tell the kwarg exists " + f"without an entry. See #2274." + ) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_read_entry_point_docstring_does_not_invent_params(fn): + """Every Parameters entry maps to a real signature kwarg. + + Catches the inverse drift: a kwarg removed from the signature but + still listed in the Parameters section. + """ + params = _signature_params(fn) + documented = _documented_params(fn) + extra = sorted(documented - params) + assert extra == [], ( + f"{fn.__name__} has Parameters-section entries that do not " + f"appear in the signature: {extra}. Either remove the entry " + f"or restore the kwarg." + ) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_allow_rotated_documented(fn): + """``allow_rotated`` was the load-bearing #2274 gap on the backends. + + Pin it explicitly so a future commit that strips the Parameters + entry while keeping the signature kwarg fails loudly. + """ + assert "allow_rotated" in _signature_params(fn), ( + f"{fn.__name__} unexpectedly dropped allow_rotated from its " + f"signature" + ) + assert "allow_rotated" in _documented_params(fn), ( + f"{fn.__name__} accepts allow_rotated but does not document it " + f"in its Parameters section (#2274)." + ) + + +@pytest.mark.parametrize("fn", READ_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_allow_unparseable_crs_documented(fn): + """``allow_unparseable_crs`` was the other shared #2274 gap. + + ``open_geotiff`` had the kwarg only in the Tier prose paragraph; + the three backends did not mention it at all. + """ + assert "allow_unparseable_crs" in _signature_params(fn), ( + f"{fn.__name__} unexpectedly dropped allow_unparseable_crs from " + f"its signature" + ) + assert "allow_unparseable_crs" in _documented_params(fn), ( + f"{fn.__name__} accepts allow_unparseable_crs but does not " + f"document it in its Parameters section (#2274)." + ) From 955b29d3634b7d341efb479309045bd14df957e5 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 13:55:24 -0700 Subject: [PATCH 2/3] Address review nit: clarify band_nodata rejection rationale (#2274) --- xrspatial/geotiff/_backends/gpu.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index f34c28001..7655646ef 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -213,8 +213,9 @@ def read_geotiff_gpu(source: str, *, band_nodata : {'first', None}, optional VRT-only. Accepted at the signature level for parity with ``open_geotiff``; passing it to ``read_geotiff_gpu`` raises - ``ValueError`` because GPU reads do not go through the VRT - pipeline. See ``read_vrt`` for the kwarg's meaning. + ``ValueError`` because the GPU dispatcher rejects ``.vrt`` + sources up front and the kwarg only applies to VRT. See + ``read_vrt`` for the kwarg's meaning. missing_sources : {'raise', 'warn'}, optional VRT-only. Same shape as ``band_nodata`` above: accepted for signature parity, rejected at dispatch with ``ValueError`` for From fb6690751109bd4cafc5589b60ef256a3439a1f2 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 16:58:46 -0700 Subject: [PATCH 3/3] Retrigger RTD build (previous run hit 902s timeout cap)