Skip to content

Commit 497d7c5

Browse files
committed
geotiff: keep no-georef int coords through to_geotiff round-trip (#1949)
``_coords_to_transform`` previously inferred a unit GeoTransform from the integer pixel coords the read-side no-georef branch emits (``np.arange(N, dtype=np.int64)``), so writing a no-georef DataArray back through ``to_geotiff`` silently injected ``ModelPixelScale`` / ``ModelTiepoint`` tags. On the next read the file looked georeferenced, the coord dtype flipped to float64, and a synthetic ``attrs['transform']`` appeared. Short-circuit when either spatial coord array has an integer dtype -- that signal only comes from the read-side no-georef fallback, so returning ``None`` here keeps the no-georef contract across an arbitrary number of read -> write -> read cycles on every writer path (eager numpy, dask streaming, GPU). An explicit ``attrs['transform']`` still wins because the writers consult it before calling this helper. Adds ``test_no_georef_writer_round_trip_1949.py`` covering the helper directly, eager / dask streaming / GPU round-trips, a double round-trip, and the explicit-attr override.
1 parent 750dc20 commit 497d7c5

2 files changed

Lines changed: 303 additions & 0 deletions

File tree

xrspatial/geotiff/_coords.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,23 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None':
277277
if len(x) < 2 or len(y) < 2:
278278
return None
279279

280+
# Integer coord dtype is the read-side no-georef signal: the
281+
# ``has_georef=False`` branch in ``coords_from_pixel_geometry``
282+
# emits ``np.arange(N, dtype=np.int64)`` for files without GeoTIFF
283+
# transform tags (#1710, #1753). Synthesising a GeoTransform from
284+
# those ``[0, 1, 2, ...]`` arrays would inject a fake unit transform
285+
# (``origin=-0.5, pixel_width=1.0``) into the written file's
286+
# ModelPixelScale / ModelTiepoint tags. The next read then takes
287+
# the georef branch and the coord dtype silently flips to
288+
# ``float64`` with ``attrs['transform']`` present, breaking the
289+
# no-georef contract that downstream code branches on. See issue
290+
# #1949. Float coord arrays still produce a GeoTransform (the
291+
# canonical georef path), and an explicit ``attrs['transform']``
292+
# bypasses this helper entirely so callers can still write a
293+
# transform alongside int coords by setting the attr.
294+
if x.dtype.kind in ('i', 'u') or y.dtype.kind in ('i', 'u'):
295+
return None
296+
280297
# GeoTIFF only supports an affine transform; non-uniform spacing
281298
# cannot be expressed faithfully. Validate up-front instead of
282299
# silently writing a transform that only matches the first step.
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
"""Round-trip tests for issue #1949.
2+
3+
A TIFF with no GeoTIFF tags (no ``ModelPixelScale``, no
4+
``ModelTiepoint``, no GeoKeys) reads back with integer pixel coords
5+
``[0, 1, 2, ...]`` and ``int64`` dtype (the no-georef branch added in
6+
#1710 and #1753).
7+
8+
Before the fix, writing that DataArray back through ``to_geotiff``
9+
silently injected a synthetic ``ModelPixelScale`` + ``ModelTiepoint``
10+
because ``_coords_to_transform`` happily inferred a unit transform from
11+
the integer pixel coords. The next read then took the georef branch
12+
and the y/x dtype flipped from ``int64`` to ``float64`` with a
13+
``transform`` attr present.
14+
15+
The fix makes ``_coords_to_transform`` return ``None`` when either
16+
spatial coord array carries an integer dtype, so the no-georef contract
17+
survives an arbitrary number of read -> write -> read cycles across
18+
every writer path.
19+
"""
20+
from __future__ import annotations
21+
22+
import importlib.util
23+
import os
24+
import tempfile
25+
26+
import numpy as np
27+
import pytest
28+
import tifffile
29+
import xarray as xr
30+
31+
from xrspatial.geotiff import _coords_to_transform, open_geotiff, to_geotiff
32+
33+
34+
def _gpu_available() -> bool:
35+
if importlib.util.find_spec("cupy") is None:
36+
return False
37+
try:
38+
import cupy
39+
return bool(cupy.cuda.is_available())
40+
except Exception:
41+
return False
42+
43+
44+
_HAS_GPU = _gpu_available()
45+
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
46+
47+
48+
# ---------------------------------------------------------------------------
49+
# Unit tests on _coords_to_transform
50+
# ---------------------------------------------------------------------------
51+
52+
53+
def test_coords_to_transform_returns_none_for_int64_y_coords():
54+
"""Integer y coords ``[0, 1, 2, ...]`` are the no-georef signal the
55+
read side emits; ``_coords_to_transform`` must not fabricate a
56+
transform from them."""
57+
da = xr.DataArray(
58+
np.zeros((4, 5), dtype=np.float32),
59+
dims=['y', 'x'],
60+
coords={
61+
'y': np.arange(4, dtype=np.int64),
62+
'x': np.arange(5, dtype=np.float64),
63+
},
64+
)
65+
assert _coords_to_transform(da) is None
66+
67+
68+
def test_coords_to_transform_returns_none_for_int64_x_coords():
69+
"""Same check on the other axis."""
70+
da = xr.DataArray(
71+
np.zeros((4, 5), dtype=np.float32),
72+
dims=['y', 'x'],
73+
coords={
74+
'y': np.arange(4, dtype=np.float64),
75+
'x': np.arange(5, dtype=np.int64),
76+
},
77+
)
78+
assert _coords_to_transform(da) is None
79+
80+
81+
def test_coords_to_transform_returns_none_for_int32_coords():
82+
"""int32 / int16 / uint also short-circuit -- the integer-dtype
83+
signal is "no georef", regardless of width."""
84+
for kind in (np.int32, np.int16, np.uint32):
85+
da = xr.DataArray(
86+
np.zeros((4, 5), dtype=np.float32),
87+
dims=['y', 'x'],
88+
coords={
89+
'y': np.arange(4, dtype=kind),
90+
'x': np.arange(5, dtype=kind),
91+
},
92+
)
93+
assert _coords_to_transform(da) is None, (
94+
f"expected None for {kind.__name__} coords")
95+
96+
97+
def test_coords_to_transform_float_coords_unchanged():
98+
"""Float coords still produce a GeoTransform (sanity baseline)."""
99+
da = xr.DataArray(
100+
np.zeros((4, 5), dtype=np.float32),
101+
dims=['y', 'x'],
102+
coords={
103+
'y': np.array([0.5, 1.5, 2.5, 3.5]),
104+
'x': np.array([10.0, 20.0, 30.0, 40.0, 50.0]),
105+
},
106+
)
107+
gt = _coords_to_transform(da)
108+
assert gt is not None
109+
assert gt.pixel_width == pytest.approx(10.0)
110+
assert gt.pixel_height == pytest.approx(1.0)
111+
112+
113+
def test_coords_to_transform_3d_yxband_int_y_returns_none():
114+
"""3D (y, x, band) with int y coords also short-circuits.
115+
116+
The helper picks the y/x dims (filtering out 'band'); the integer
117+
check must apply to those, not to the band axis.
118+
"""
119+
da = xr.DataArray(
120+
np.zeros((4, 5, 3), dtype=np.float32),
121+
dims=['y', 'x', 'band'],
122+
coords={
123+
'y': np.arange(4, dtype=np.int64),
124+
'x': np.arange(5, dtype=np.int64),
125+
'band': np.arange(3),
126+
},
127+
)
128+
assert _coords_to_transform(da) is None
129+
130+
131+
# ---------------------------------------------------------------------------
132+
# Round-trip tests through to_geotiff
133+
# ---------------------------------------------------------------------------
134+
135+
136+
def _make_no_georef_tiff(path, shape=(4, 5), seed=1949):
137+
"""Write a TIFF with no GeoTIFF tags."""
138+
arr = np.random.default_rng(seed=seed).random(shape).astype(np.float32)
139+
tifffile.imwrite(
140+
path, arr, photometric='minisblack', planarconfig='contig',
141+
metadata=None,
142+
)
143+
return arr
144+
145+
146+
def test_round_trip_preserves_int_coords_eager(tmp_path):
147+
"""Eager numpy round-trip: y/x coord dtype stays int64 and no
148+
transform attr is injected."""
149+
src = str(tmp_path / "no_georef_src_1949.tif")
150+
_make_no_georef_tiff(src)
151+
152+
da = open_geotiff(src)
153+
assert da.coords['y'].dtype == np.int64
154+
assert da.coords['x'].dtype == np.int64
155+
assert 'transform' not in da.attrs
156+
157+
out = str(tmp_path / "no_georef_out_1949.tif")
158+
to_geotiff(da, out, compression='none', tiled=False)
159+
160+
da2 = open_geotiff(out)
161+
assert da2.coords['y'].dtype == np.int64, (
162+
f"round-trip flipped y coord dtype from int64 to "
163+
f"{da2.coords['y'].dtype}: {da2.coords['y'].values}")
164+
assert da2.coords['x'].dtype == np.int64
165+
assert 'transform' not in da2.attrs, (
166+
f"round-trip injected fake transform attr: "
167+
f"{da2.attrs.get('transform')}")
168+
np.testing.assert_array_equal(
169+
da2.coords['y'].values, da.coords['y'].values)
170+
np.testing.assert_array_equal(
171+
da2.coords['x'].values, da.coords['x'].values)
172+
173+
174+
def test_round_trip_preserves_int_coords_3d(tmp_path):
175+
"""Same round-trip on a 3D multi-band file."""
176+
src = str(tmp_path / "no_georef_src_3d_1949.tif")
177+
arr = np.random.default_rng(seed=1949).random(
178+
(4, 5, 3)).astype(np.float32)
179+
tifffile.imwrite(src, arr, photometric='minisblack',
180+
planarconfig='contig', metadata=None)
181+
182+
da = open_geotiff(src)
183+
assert da.coords['y'].dtype == np.int64
184+
assert da.coords['x'].dtype == np.int64
185+
assert 'transform' not in da.attrs
186+
187+
out = str(tmp_path / "no_georef_out_3d_1949.tif")
188+
to_geotiff(da, out, compression='none')
189+
190+
da2 = open_geotiff(out)
191+
assert da2.coords['y'].dtype == np.int64
192+
assert da2.coords['x'].dtype == np.int64
193+
assert 'transform' not in da2.attrs
194+
195+
196+
def test_round_trip_dask_streaming_preserves_int_coords(tmp_path):
197+
"""Same contract via the dask streaming writer path."""
198+
src = str(tmp_path / "no_georef_src_dask_1949.tif")
199+
arr = np.random.default_rng(seed=1949).random(
200+
(64, 64)).astype(np.float32)
201+
tifffile.imwrite(src, arr, photometric='minisblack',
202+
planarconfig='contig', tile=(32, 32),
203+
compression='deflate', metadata=None)
204+
205+
da_dk = open_geotiff(src, chunks=32)
206+
assert da_dk.coords['y'].dtype == np.int64
207+
assert da_dk.coords['x'].dtype == np.int64
208+
assert 'transform' not in da_dk.attrs
209+
210+
out = str(tmp_path / "no_georef_out_dask_1949.tif")
211+
to_geotiff(da_dk, out, compression='deflate', tile_size=32)
212+
213+
da_rt = open_geotiff(out)
214+
assert da_rt.coords['y'].dtype == np.int64
215+
assert da_rt.coords['x'].dtype == np.int64
216+
assert 'transform' not in da_rt.attrs
217+
218+
219+
def test_double_round_trip_stable(tmp_path):
220+
"""Two consecutive read -> write -> read cycles produce identical
221+
coord arrays and attrs sets."""
222+
src = str(tmp_path / "no_georef_double_1949.tif")
223+
_make_no_georef_tiff(src, shape=(6, 8))
224+
225+
da1 = open_geotiff(src)
226+
out1 = str(tmp_path / "no_georef_double_out1_1949.tif")
227+
to_geotiff(da1, out1, compression='none', tiled=False)
228+
229+
da2 = open_geotiff(out1)
230+
out2 = str(tmp_path / "no_georef_double_out2_1949.tif")
231+
to_geotiff(da2, out2, compression='none', tiled=False)
232+
233+
da3 = open_geotiff(out2)
234+
np.testing.assert_array_equal(
235+
da1.coords['y'].values, da3.coords['y'].values)
236+
np.testing.assert_array_equal(
237+
da1.coords['x'].values, da3.coords['x'].values)
238+
assert da3.coords['y'].dtype == da1.coords['y'].dtype
239+
assert da3.coords['x'].dtype == da1.coords['x'].dtype
240+
# ``transform`` should never appear in either attrs set
241+
assert 'transform' not in da1.attrs
242+
assert 'transform' not in da3.attrs
243+
244+
245+
def test_explicit_transform_attr_still_writes(tmp_path):
246+
"""When the user explicitly sets ``attrs['transform']`` on a
247+
DataArray with int coords (an odd combination, but allowed), the
248+
writer still honours that explicit value -- the fix only catches the
249+
implicit-from-int-coords path."""
250+
arr = np.zeros((4, 5), dtype=np.float32)
251+
da = xr.DataArray(
252+
arr, dims=['y', 'x'],
253+
coords={
254+
'y': np.arange(4, dtype=np.int64),
255+
'x': np.arange(5, dtype=np.int64),
256+
},
257+
attrs={'transform': (30.0, 0.0, 100000.0, 0.0, -30.0, 4000000.0),
258+
'crs': 32610},
259+
)
260+
out = str(tmp_path / "explicit_transform_1949.tif")
261+
to_geotiff(da, out, compression='none', tiled=False)
262+
da_rt = open_geotiff(out)
263+
assert 'transform' in da_rt.attrs
264+
assert da_rt.attrs['transform'][0] == 30.0
265+
assert da_rt.attrs['crs'] == 32610
266+
267+
268+
@_gpu_only
269+
def test_gpu_writer_preserves_int_coords(tmp_path):
270+
"""The GPU writer path goes through the same ``_coords_to_transform``
271+
fallback; the fix has to extend there too."""
272+
src = str(tmp_path / "no_georef_src_gpu_1949.tif")
273+
_make_no_georef_tiff(src, shape=(64, 64))
274+
275+
da_gpu = open_geotiff(src, gpu=True)
276+
assert da_gpu.coords['y'].dtype == np.int64
277+
assert da_gpu.coords['x'].dtype == np.int64
278+
assert 'transform' not in da_gpu.attrs
279+
280+
out = str(tmp_path / "no_georef_out_gpu_1949.tif")
281+
to_geotiff(da_gpu, out, compression='deflate')
282+
283+
da_rt = open_geotiff(out)
284+
assert da_rt.coords['y'].dtype == np.int64
285+
assert da_rt.coords['x'].dtype == np.int64
286+
assert 'transform' not in da_rt.attrs

0 commit comments

Comments
 (0)