From 7863d1dcd291ef1c1557774408e4d2b96760edab Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 17:05:16 -0700 Subject: [PATCH 1/2] Fix CRS model type guess from EPSG number range (#2277) The GeoTIFF writer decided GeographicTypeGeoKey vs ProjectedCSTypeGeoKey from the EPSG number range (4326 plus 4000-4999). Anything else got written as projected. That silently mis-tagged geographic CRSes registered outside the legacy block -- 6318 (NAD83(2011)), 7844 (GDA2020), 9057 (WGS 84 (G2139)), 8252 (NAD83(FBN)), etc. -- which corrupts the CRS at write time. The writer now consults pyproj.CRS.from_epsg(...).is_geographic when pyproj is available. When pyproj is not installed and the code falls outside the hard-coded fallback set (4326 + 4000-4999), the writer raises UnknownCRSModelTypeError instead of guessing. Silent CRS corruption is worse than an explicit error at write time. New typed error UnknownCRSModelTypeError lives next to the existing GeoTIFFAmbiguousMetadataError family and is exported from xrspatial.geotiff. Tests cover the codes that previously regressed (6318, 7844, 9057, 8252), the legacy-range codes (4326, 4269, 4267), real projected codes (32610, 3857, 2193), the pyproj-missing fail-closed path, and the pyproj-installed-but-unknown-code path. --- xrspatial/geotiff/__init__.py | 2 + xrspatial/geotiff/_errors.py | 16 +++ xrspatial/geotiff/_geotags.py | 79 ++++++++++++-- xrspatial/geotiff/tests/test_geotags.py | 133 ++++++++++++++++++++++++ 4 files changed, 224 insertions(+), 6 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 23400c4f4..3c73d5e21 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -61,6 +61,7 @@ MixedBandMetadataError, NonUniformCoordsError, RotatedTransformError, + UnknownCRSModelTypeError, UnparseableCRSError, ) from ._geotags import GeoTransform, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT @@ -140,6 +141,7 @@ 'NonUniformCoordsError', 'RotatedTransformError', 'SUPPORTED_FEATURES', + 'UnknownCRSModelTypeError', 'UnparseableCRSError', 'UnsafeURLError', 'open_geotiff', diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py index aa972b546..47f2492e6 100644 --- a/xrspatial/geotiff/_errors.py +++ b/xrspatial/geotiff/_errors.py @@ -103,6 +103,21 @@ class ConflictingNodataError(GeoTIFFAmbiguousMetadataError): """ +class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): + """Can't classify an EPSG as geographic or projected on write (#2277). + + Raised by the GeoTIFF writer when the caller supplies an EPSG code + that pyproj cannot resolve, or when pyproj isn't installed and the + code falls outside the hard-coded geographic fallback set + (EPSG 4326 plus the 4000-4999 block). The legacy heuristic at the + same site guessed any code outside that window was projected, which + silently mis-tagged geographic codes like 6318 (NAD83(2011)), + 7844 (GDA2020), and 9057 (WGS 84 (G2139)) as + ``ProjectedCSTypeGeoKey``. Silent CRS corruption is worse than an + explicit error. + """ + + __all__ = [ "GeoTIFFAmbiguousMetadataError", "InvalidCRSCodeError", @@ -112,4 +127,5 @@ class ConflictingNodataError(GeoTIFFAmbiguousMetadataError): "MixedBandMetadataError", "ConflictingCRSError", "ConflictingNodataError", + "UnknownCRSModelTypeError", ] diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index 2f482a1c7..e7b862ece 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -34,7 +34,7 @@ TAG_GEO_KEY_DIRECTORY, TAG_GEO_DOUBLE_PARAMS, TAG_GEO_ASCII_PARAMS, ) from ._dtypes import resolve_bits_per_sample -from ._errors import RotatedTransformError +from ._errors import RotatedTransformError, UnknownCRSModelTypeError # ImageDescription tag (270). Captured for round-trip but not managed # by the writer -- it flows through extra_tags pass-through. @@ -1317,6 +1317,72 @@ def _model_type_from_wkt(wkt: str) -> int: return MODEL_TYPE_PROJECTED +# Hard-coded fallback set of EPSG codes known to be geographic. Used only +# when pyproj is unavailable. Kept intentionally tight: the historic +# 4000-4999 block plus EPSG 4326 itself, which is the case the legacy +# range heuristic got right. Anything outside this set without pyproj is +# treated as "unknown" and raises -- silent CRS corruption is worse than +# an explicit error. +# +# The range covers the EPSG "geographic CRS" allocation that predates the +# more recent realisations (NAD83(2011) = 6318, GDA2020 = 7844, +# WGS 84 (G2139) = 9057, etc.) which live outside 4000-4999 and need +# pyproj to classify correctly. See issue #2277. +_KNOWN_GEOGRAPHIC_EPSG_FALLBACK = frozenset(range(4000, 5000)) + + +def _model_type_from_epsg(crs_epsg: int) -> int: + """Return the GeoTIFF ModelType (geographic vs projected) for an EPSG. + + Prefers ``pyproj.CRS.from_epsg(crs_epsg).is_geographic`` so the + decision matches the actual EPSG registry. Falls back to a tight + hard-coded allowlist (EPSG 4326 plus the 4000-4999 block) when + pyproj isn't installed. Anything outside that fallback set raises + :class:`UnknownCRSModelTypeError` rather than guessing -- the legacy range + heuristic at this site silently mis-tagged geographic codes like + 6318 (NAD83(2011)) and 7844 (GDA2020) as projected. + + See issue #2277. + """ + try: + from pyproj import CRS + except ImportError: + CRS = None # type: ignore[assignment] + + if CRS is not None: + try: + crs = CRS.from_epsg(crs_epsg) + except Exception as e: + # pyproj is installed but the code is unknown or malformed. + # Fall back to the hard-coded allowlist before giving up so + # an offline pyproj-database mismatch doesn't break a code + # that the legacy heuristic handled correctly. + if crs_epsg in _KNOWN_GEOGRAPHIC_EPSG_FALLBACK: + return MODEL_TYPE_GEOGRAPHIC + raise UnknownCRSModelTypeError( + f"Cannot determine GeoTIFF model type for EPSG:{crs_epsg}: " + f"pyproj.CRS.from_epsg failed " + f"({type(e).__name__}: {e}). Refusing to guess; passing a " + "known EPSG or installing a current pyproj database will " + "resolve this." + ) from e + if crs.is_geographic: + return MODEL_TYPE_GEOGRAPHIC + return MODEL_TYPE_PROJECTED + + # pyproj unavailable. Use the tight fallback set; raise otherwise. + if crs_epsg in _KNOWN_GEOGRAPHIC_EPSG_FALLBACK: + return MODEL_TYPE_GEOGRAPHIC + raise UnknownCRSModelTypeError( + f"Cannot determine GeoTIFF model type for EPSG:{crs_epsg} without " + "pyproj. Install pyproj so the writer can consult " + "CRS.from_epsg(...).is_geographic, or pass an EPSG in the " + "hard-coded geographic fallback set (4000-4999). Refusing to " + "guess: the legacy range heuristic silently mis-tagged codes " + "like 6318 and 7844 as projected. See issue #2277." + ) + + def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None, nodata=None, raster_type: int = RASTER_PIXEL_IS_AREA, @@ -1402,11 +1468,12 @@ def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None, # ModelType if crs_epsg is not None: - # Guess model type from EPSG (simple heuristic) - if crs_epsg == 4326 or (crs_epsg >= 4000 and crs_epsg < 5000): - model_type = MODEL_TYPE_GEOGRAPHIC - else: - model_type = MODEL_TYPE_PROJECTED + # Resolve the GeoTIFF ModelType via pyproj when available; raise + # on unknown codes when pyproj is missing rather than guessing. + # See issue #2277 -- the historic EPSG-range heuristic silently + # mis-tagged geographic codes outside 4000-4999 (e.g. 6318, + # 7844, 9057) as projected, corrupting the CRS at write time. + model_type = _model_type_from_epsg(crs_epsg) key_entries.append((GEOKEY_MODEL_TYPE, 0, 1, model_type)) num_keys += 1 elif crs_wkt is not None: diff --git a/xrspatial/geotiff/tests/test_geotags.py b/xrspatial/geotiff/tests/test_geotags.py index 12bfeaa5f..c78096214 100644 --- a/xrspatial/geotiff/tests/test_geotags.py +++ b/xrspatial/geotiff/tests/test_geotags.py @@ -110,6 +110,139 @@ def test_projected_crs_geokey(self): assert 3072 in geokeys # GEOKEY_PROJECTED_CS_TYPE +class TestModelTypeFromEPSG: + """Regression coverage for issue #2277. + + The legacy heuristic at build_geo_tags decided geographic-vs-projected + from the EPSG number range (4326 plus 4000-4999). That silently + mis-tagged geographic CRSes registered outside the 4000-4999 block -- + NAD83(2011) = 6318, GDA2020 = 7844, WGS 84 (G2139) = 9057, etc. -- + as projected, which corrupts the CRS at write time. + + The fix consults pyproj when available; when pyproj is missing, + anything outside the hard-coded fallback set raises rather than + guessing. + """ + + @pytest.mark.parametrize("epsg", [4326, 4269, 4267]) + def test_geographic_in_legacy_range(self, epsg): + # Codes the old range heuristic already handled. + gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) + tags = build_geo_tags(gt, crs_epsg=epsg) + geokeys = tags[TAG_GEO_KEY_DIRECTORY] + assert GEOKEY_GEOGRAPHIC_TYPE in geokeys + assert GEOKEY_PROJECTED_CS_TYPE not in geokeys + + @pytest.mark.parametrize("epsg", [6318, 7844, 9057, 8252]) + def test_geographic_outside_legacy_range(self, epsg): + # The bug: pre-fix, these wrote ProjectedCSTypeGeoKey. + # pyproj.CRS.from_epsg(...).is_geographic is True for all four. + pytest.importorskip("pyproj") + gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) + tags = build_geo_tags(gt, crs_epsg=epsg) + geokeys = tags[TAG_GEO_KEY_DIRECTORY] + assert GEOKEY_GEOGRAPHIC_TYPE in geokeys, ( + f"EPSG:{epsg} is geographic per pyproj but was written as " + f"projected -- regression of issue #2277" + ) + assert GEOKEY_PROJECTED_CS_TYPE not in geokeys + # And the EPSG payload is attached to the geographic key. + flat = list(geokeys) + i = flat.index(GEOKEY_GEOGRAPHIC_TYPE) + # Each key entry is (key_id, location, count, value_offset); + # location=0 means the value is stored inline at value_offset. + assert flat[i + 3] == epsg + + @pytest.mark.parametrize("epsg", [32610, 3857, 2193]) + def test_projected_dispatch(self, epsg): + pytest.importorskip("pyproj") + gt = GeoTransform(500000.0, 4500000.0, 30.0, -30.0) + tags = build_geo_tags(gt, crs_epsg=epsg) + geokeys = tags[TAG_GEO_KEY_DIRECTORY] + assert GEOKEY_PROJECTED_CS_TYPE in geokeys + assert GEOKEY_GEOGRAPHIC_TYPE not in geokeys + + def test_unknown_epsg_without_pyproj_raises(self, monkeypatch): + """Fail closed when pyproj is unavailable and the code is unknown. + + Mocks `pyproj` import failure to exercise the fallback path. + EPSG 7844 (GDA2020 geographic) is outside the hard-coded + 4000-4999 allowlist, so without pyproj the writer cannot tell + geographic from projected and must raise -- silent corruption + is worse than an explicit error. + """ + import builtins + from xrspatial.geotiff._errors import UnknownCRSModelTypeError + + real_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == "pyproj" or name.startswith("pyproj."): + raise ImportError("simulated: pyproj not installed") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", fake_import) + + gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) + with pytest.raises(UnknownCRSModelTypeError, match="pyproj"): + build_geo_tags(gt, crs_epsg=7844) + + def test_known_geographic_without_pyproj_still_works(self, monkeypatch): + """The 4000-4999 fallback set keeps working when pyproj is gone. + + EPSG 4326 (and the broader 4000-4999 block) is in the hard-coded + fallback set so EPSG-tagged geographic rasters continue to round + trip without a pyproj dependency. + """ + import builtins + + real_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == "pyproj" or name.startswith("pyproj."): + raise ImportError("simulated: pyproj not installed") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", fake_import) + + gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) + tags = build_geo_tags(gt, crs_epsg=4326) + geokeys = tags[TAG_GEO_KEY_DIRECTORY] + assert GEOKEY_GEOGRAPHIC_TYPE in geokeys + assert GEOKEY_PROJECTED_CS_TYPE not in geokeys + + def test_unknown_epsg_with_pyproj_raises(self, monkeypatch): + """pyproj raises on an unknown EPSG -> writer raises too. + + Simulates the case where pyproj is installed but the EPSG code + isn't in its database (e.g. the local PROJ database is out of + date). The fallback set covers EPSG 4326 / 4000-4999; anything + else should raise rather than fall through to a guess. + """ + from xrspatial.geotiff import _geotags + from xrspatial.geotiff._errors import UnknownCRSModelTypeError + + # Patch _model_type_from_epsg's pyproj path by stubbing CRS.from_epsg. + import pyproj + + def boom(code): + raise pyproj.exceptions.CRSError(f"simulated unknown code {code}") + + monkeypatch.setattr(pyproj.CRS, "from_epsg", staticmethod(boom)) + + gt = GeoTransform(0.0, 0.0, 30.0, -30.0) + # 9999999 is outside the 4000-4999 fallback set. + with pytest.raises(UnknownCRSModelTypeError): + build_geo_tags(gt, crs_epsg=9999999) + + # And the fallback set still rescues EPSG 4326 even when pyproj fails. + tags = build_geo_tags(gt, crs_epsg=4326) + assert GEOKEY_GEOGRAPHIC_TYPE in tags[TAG_GEO_KEY_DIRECTORY] + + # Use _geotags to silence "imported but unused" linters. + assert _geotags is not None + + def _build_tiff_with_transformation_tag(matrix_16: tuple) -> bytes: """Build a tiny single-strip TIFF carrying a 4x4 ModelTransformationTag. From 201223844979e75e03caefbd6d09211fba2d4b3f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 17:16:57 -0700 Subject: [PATCH 2/2] Address review: tighten EPSG fallback, CHANGELOG, pyproj extra (#2277) - Narrow `_KNOWN_GEOGRAPHIC_EPSG_FALLBACK` from the full 4000-4999 block to a vetted allowlist (4326, 4269, 4267, 4258, 4283, 4322, 4230, 4019, 4047). The 4000-4999 range contained projected codes (4087/4088 World Equidistant Cylindrical, 4499 CGCS2000 GK 21) that the old fallback would have mis-tagged as geographic, the exact dual of the bug this PR is fixing. Without pyproj, classify only what we can manually verify. - Add EPSG 4087/4088/4499 regression test so the projected-inside- legacy-range case stays covered. - Drop the noise lines from the new tests: the fragile `flat.index` assertion and the dummy `assert _geotags is not None`. - Refresh the helper's docstring / inline comment to mention both fallback paths (pyproj-missing AND pyproj-installed-but-DB-failed). - Append the new `UnknownCRSModelTypeError` to the public-API contract test in test_features.py so it stays in `__all__`. - Add `pyproj` to the `geotiff` extra in setup.cfg so installing `xarray-spatial[geotiff]` picks up the dependency the writer now prefers. - CHANGELOG entry under Unreleased calls out the behaviour change. --- CHANGELOG.md | 1 + setup.cfg | 5 ++ xrspatial/geotiff/_geotags.py | 66 ++++++++++++++-------- xrspatial/geotiff/tests/test_features.py | 1 + xrspatial/geotiff/tests/test_geotags.py | 70 +++++++++++++----------- 5 files changed, 90 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c745c0efa..d54ba5ffc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Unreleased #### Bug fixes and improvements +- Resolve the GeoTIFF writer's `GeographicTypeGeoKey` / `ProjectedCSTypeGeoKey` decision via pyproj instead of an EPSG number range. The legacy heuristic (4326 + 4000-4999 -> geographic, else projected) silently mis-tagged geographic CRSes registered outside the 4000-4999 block (NAD83(2011) = 6318, GDA2020 = 7844, WGS 84 (G2139) = 9057, etc.) as projected and projected codes inside the block (4087 / 4088 / 4499) as geographic, corrupting the CRS at write time. The writer now calls `pyproj.CRS.from_epsg(...).is_geographic`. When pyproj can't classify a code (uninstalled, or installed but the local PROJ database lacks the entry), the writer raises the new `UnknownCRSModelTypeError` rather than guessing -- a small vetted allowlist (4326, 4269, 4267, 4258, 4283, 4322, 4230, 4019, 4047) is still honoured for the pyproj-missing case. `pyproj` is now listed under the `geotiff` extra. (#2277) - Remove read-side emission of the 13 deprecated GeoTIFF attrs (`crs_name`, `geog_citation`, `datum_code`, `angular_units`, `semi_major_axis`, `inv_flattening`, `linear_units`, `projection_code`, `vertical_crs`, `vertical_citation`, `vertical_units`, `colormap_rgba`, `cmap`) and bump `attrs['_xrspatial_geotiff_contract']` from 1 to 2. Downstream code that read these via `attrs[key]` now sees `KeyError`; migrate to `attrs.get(key)` or derive the value from `attrs['crs']` / `attrs['crs_wkt']` with pyproj. The `.xrs.plot()` accessor still surfaces palette colormaps by building a `ListedColormap` from the canonical `attrs['colormap']`. (#2016) - Accept numpy integer scalars as the `crs=` argument to `to_geotiff` / `write_geotiff_gpu`. The validator already allowed `numbers.Integral`, but the writers gated EPSG assignment on `isinstance(crs, int)`, so `np.int32` / `np.int64` / `np.uint16` values passed validation then silently fell through with no EPSG written. (#2082) - Tighten the writer's no-georef sentinel for integer x/y coords. The pre-fix check treated any integer dtype on either axis as the read-side no-georef placeholder and skipped transform inference, which also caught user-authored projected grids with integer-spaced coords (e.g. `x=[100,101,102], y=[200,199]`) and silently stripped their georef on write. The sentinel now matches only the exact reader pattern: `int64` ascending contiguous-step-1 arange on both axes. User-authored integer-coord grids that don't match (descending, non-unit step, non-uniform, or non-`int64`) now produce a real transform or raise `NonUniformCoordsError`. Coord values round-trip exactly through the new path; dtype flips int->float on subsequent reads. (#2087) diff --git a/setup.cfg b/setup.cfg index 464f6efe6..dcfdbbfaa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,6 +64,11 @@ reproject = pyproj geotiff = deflate + # Required by the GeoTIFF writer to classify EPSG codes as + # geographic vs projected. Without pyproj the writer can only + # handle a tiny hard-coded allowlist of EPSGs and raises + # UnknownCRSModelTypeError on anything else. See issue #2277. + pyproj dask = dask[array] dask-geopandas diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index e7b862ece..2eb7f9c6e 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -1317,30 +1317,49 @@ def _model_type_from_wkt(wkt: str) -> int: return MODEL_TYPE_PROJECTED -# Hard-coded fallback set of EPSG codes known to be geographic. Used only -# when pyproj is unavailable. Kept intentionally tight: the historic -# 4000-4999 block plus EPSG 4326 itself, which is the case the legacy -# range heuristic got right. Anything outside this set without pyproj is -# treated as "unknown" and raises -- silent CRS corruption is worse than -# an explicit error. +# Vetted allowlist of EPSG codes known to be geographic. Consulted in +# two situations: when pyproj is unavailable at all, and when pyproj is +# installed but its database can't resolve the code (e.g. out-of-date +# PROJ db). Anything outside this set falls through to +# UnknownCRSModelTypeError -- silent CRS corruption is worse than an +# explicit error. # -# The range covers the EPSG "geographic CRS" allocation that predates the -# more recent realisations (NAD83(2011) = 6318, GDA2020 = 7844, -# WGS 84 (G2139) = 9057, etc.) which live outside 4000-4999 and need -# pyproj to classify correctly. See issue #2277. -_KNOWN_GEOGRAPHIC_EPSG_FALLBACK = frozenset(range(4000, 5000)) +# The set is kept intentionally small. The original range heuristic +# (4000-4999) was wrong in both directions: it missed geographic codes +# outside the window (NAD83(2011) = 6318, GDA2020 = 7844, etc., the bug +# this PR fixes), AND it also covered projected codes inside the window +# (4087 / 4088 World Equidistant Cylindrical, 4499 CGCS2000 / GK +# zone 21, etc.). Expanding the set without pyproj's authoritative +# answer just trades one mis-tag for another. +# +# Currently: the few datums callers regularly write without pyproj. +# Extend only after verifying ``CRS.from_epsg(code).is_geographic`` is +# True. See issue #2277. +_KNOWN_GEOGRAPHIC_EPSG_FALLBACK = frozenset({ + 4326, # WGS 84 + 4269, # NAD83 + 4267, # NAD27 + 4258, # ETRS89 + 4283, # GDA94 + 4322, # WGS 72 + 4230, # ED50 + 4019, # Unknown datum based on GRS 1980 ellipsoid + 4047, # Unspecified datum based on GRS 1980 Authalic Sphere +}) def _model_type_from_epsg(crs_epsg: int) -> int: """Return the GeoTIFF ModelType (geographic vs projected) for an EPSG. Prefers ``pyproj.CRS.from_epsg(crs_epsg).is_geographic`` so the - decision matches the actual EPSG registry. Falls back to a tight - hard-coded allowlist (EPSG 4326 plus the 4000-4999 block) when - pyproj isn't installed. Anything outside that fallback set raises - :class:`UnknownCRSModelTypeError` rather than guessing -- the legacy range - heuristic at this site silently mis-tagged geographic codes like - 6318 (NAD83(2011)) and 7844 (GDA2020) as projected. + decision matches the actual EPSG registry. Falls back to a small + vetted allowlist of common geographic datums + (:data:`_KNOWN_GEOGRAPHIC_EPSG_FALLBACK`) when pyproj isn't + installed *or* when pyproj is installed but fails to resolve the + code. Anything outside that allowlist raises + :class:`UnknownCRSModelTypeError` rather than guessing -- the legacy + range heuristic at this site silently mis-tagged geographic codes + like 6318 (NAD83(2011)) and 7844 (GDA2020) as projected. See issue #2277. """ @@ -1377,7 +1396,8 @@ def _model_type_from_epsg(crs_epsg: int) -> int: f"Cannot determine GeoTIFF model type for EPSG:{crs_epsg} without " "pyproj. Install pyproj so the writer can consult " "CRS.from_epsg(...).is_geographic, or pass an EPSG in the " - "hard-coded geographic fallback set (4000-4999). Refusing to " + "hard-coded geographic fallback set " + f"({sorted(_KNOWN_GEOGRAPHIC_EPSG_FALLBACK)}). Refusing to " "guess: the legacy range heuristic silently mis-tagged codes " "like 6318 and 7844 as projected. See issue #2277." ) @@ -1469,10 +1489,12 @@ def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None, # ModelType if crs_epsg is not None: # Resolve the GeoTIFF ModelType via pyproj when available; raise - # on unknown codes when pyproj is missing rather than guessing. - # See issue #2277 -- the historic EPSG-range heuristic silently - # mis-tagged geographic codes outside 4000-4999 (e.g. 6318, - # 7844, 9057) as projected, corrupting the CRS at write time. + # on unknown codes when pyproj can't classify them rather than + # guessing. See issue #2277 -- the historic EPSG-range heuristic + # silently mis-tagged geographic codes outside 4000-4999 (e.g. + # 6318, 7844, 9057) as projected AND projected codes inside + # 4000-4999 (e.g. 4087, 4088, 4499) as geographic, corrupting + # the CRS at write time. model_type = _model_type_from_epsg(crs_epsg) key_entries.append((GEOKEY_MODEL_TYPE, 0, 1, model_type)) num_keys += 1 diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index ef7906e41..178e2cd10 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2780,6 +2780,7 @@ def test_all_lists_supported_functions(self): 'MixedBandMetadataError', 'NonUniformCoordsError', 'RotatedTransformError', + 'UnknownCRSModelTypeError', 'UnparseableCRSError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', diff --git a/xrspatial/geotiff/tests/test_geotags.py b/xrspatial/geotiff/tests/test_geotags.py index c78096214..295c55bdf 100644 --- a/xrspatial/geotiff/tests/test_geotags.py +++ b/xrspatial/geotiff/tests/test_geotags.py @@ -115,18 +115,21 @@ class TestModelTypeFromEPSG: The legacy heuristic at build_geo_tags decided geographic-vs-projected from the EPSG number range (4326 plus 4000-4999). That silently - mis-tagged geographic CRSes registered outside the 4000-4999 block -- - NAD83(2011) = 6318, GDA2020 = 7844, WGS 84 (G2139) = 9057, etc. -- - as projected, which corrupts the CRS at write time. - - The fix consults pyproj when available; when pyproj is missing, - anything outside the hard-coded fallback set raises rather than + mis-tagged geographic CRSes registered outside the 4000-4999 block + (NAD83(2011) = 6318, GDA2020 = 7844, WGS 84 (G2139) = 9057, etc.) as + projected, AND mis-tagged projected codes inside the block (4087 / + 4088 / 4499 etc.) as geographic. Both directions corrupt the CRS at + write time. + + The fix consults pyproj when available; when pyproj can't classify + a code (uninstalled, or installed but DB lookup fails), anything + outside a small vetted geographic allowlist raises rather than guessing. """ - @pytest.mark.parametrize("epsg", [4326, 4269, 4267]) - def test_geographic_in_legacy_range(self, epsg): - # Codes the old range heuristic already handled. + @pytest.mark.parametrize("epsg", [4326, 4269, 4267, 4258, 4283]) + def test_geographic_allowlist(self, epsg): + # Codes the hard-coded fallback set covers explicitly. gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) tags = build_geo_tags(gt, crs_epsg=epsg) geokeys = tags[TAG_GEO_KEY_DIRECTORY] @@ -146,12 +149,22 @@ def test_geographic_outside_legacy_range(self, epsg): f"projected -- regression of issue #2277" ) assert GEOKEY_PROJECTED_CS_TYPE not in geokeys - # And the EPSG payload is attached to the geographic key. - flat = list(geokeys) - i = flat.index(GEOKEY_GEOGRAPHIC_TYPE) - # Each key entry is (key_id, location, count, value_offset); - # location=0 means the value is stored inline at value_offset. - assert flat[i + 3] == epsg + + @pytest.mark.parametrize("epsg", [4087, 4088, 4499]) + def test_projected_inside_legacy_range(self, epsg): + # The other direction of the legacy bug: codes in 4000-4999 that + # are actually projected (World Equidistant Cylindrical at 4087 / + # 4088, CGCS2000 Gauss-Kruger zone 21 at 4499). The fix routes + # these through pyproj, which classifies them correctly. + pytest.importorskip("pyproj") + gt = GeoTransform(500000.0, 4500000.0, 30.0, -30.0) + tags = build_geo_tags(gt, crs_epsg=epsg) + geokeys = tags[TAG_GEO_KEY_DIRECTORY] + assert GEOKEY_PROJECTED_CS_TYPE in geokeys, ( + f"EPSG:{epsg} is projected per pyproj but was written as " + f"geographic -- regression of issue #2277" + ) + assert GEOKEY_GEOGRAPHIC_TYPE not in geokeys @pytest.mark.parametrize("epsg", [32610, 3857, 2193]) def test_projected_dispatch(self, epsg): @@ -167,9 +180,9 @@ def test_unknown_epsg_without_pyproj_raises(self, monkeypatch): Mocks `pyproj` import failure to exercise the fallback path. EPSG 7844 (GDA2020 geographic) is outside the hard-coded - 4000-4999 allowlist, so without pyproj the writer cannot tell - geographic from projected and must raise -- silent corruption - is worse than an explicit error. + allowlist, so without pyproj the writer cannot tell geographic + from projected and must raise -- silent corruption is worse + than an explicit error. """ import builtins from xrspatial.geotiff._errors import UnknownCRSModelTypeError @@ -188,11 +201,10 @@ def fake_import(name, *args, **kwargs): build_geo_tags(gt, crs_epsg=7844) def test_known_geographic_without_pyproj_still_works(self, monkeypatch): - """The 4000-4999 fallback set keeps working when pyproj is gone. + """The hard-coded allowlist keeps working when pyproj is gone. - EPSG 4326 (and the broader 4000-4999 block) is in the hard-coded - fallback set so EPSG-tagged geographic rasters continue to round - trip without a pyproj dependency. + EPSG 4326 (and the rest of the vetted allowlist) lets common + geographic rasters round-trip without a pyproj dependency. """ import builtins @@ -216,13 +228,12 @@ def test_unknown_epsg_with_pyproj_raises(self, monkeypatch): Simulates the case where pyproj is installed but the EPSG code isn't in its database (e.g. the local PROJ database is out of - date). The fallback set covers EPSG 4326 / 4000-4999; anything - else should raise rather than fall through to a guess. + date). The fallback set covers a small vetted allowlist; + anything else should raise rather than guess. """ - from xrspatial.geotiff import _geotags from xrspatial.geotiff._errors import UnknownCRSModelTypeError - # Patch _model_type_from_epsg's pyproj path by stubbing CRS.from_epsg. + # Patch CRS.from_epsg to simulate a missing DB entry. import pyproj def boom(code): @@ -231,17 +242,14 @@ def boom(code): monkeypatch.setattr(pyproj.CRS, "from_epsg", staticmethod(boom)) gt = GeoTransform(0.0, 0.0, 30.0, -30.0) - # 9999999 is outside the 4000-4999 fallback set. + # 9999999 is outside the hard-coded allowlist. with pytest.raises(UnknownCRSModelTypeError): build_geo_tags(gt, crs_epsg=9999999) - # And the fallback set still rescues EPSG 4326 even when pyproj fails. + # And the allowlist still rescues EPSG 4326 even when pyproj fails. tags = build_geo_tags(gt, crs_epsg=4326) assert GEOKEY_GEOGRAPHIC_TYPE in tags[TAG_GEO_KEY_DIRECTORY] - # Use _geotags to silence "imported but unused" linters. - assert _geotags is not None - def _build_tiff_with_transformation_tag(matrix_16: tuple) -> bytes: """Build a tiny single-strip TIFF carrying a 4x4 ModelTransformationTag.