|
| 1 | +"""Round-trip tests for issue #1949. |
| 2 | +
|
| 3 | +A TIFF with no GeoTIFF tags (no ``ModelPixelScale``, no |
| 4 | +``ModelTiepoint``, no GeoKeys) reads back with integer pixel coords |
| 5 | +``[0, 1, 2, ...]`` and ``int64`` dtype (the no-georef branch added in |
| 6 | +#1710 and #1753). |
| 7 | +
|
| 8 | +Before the fix, writing that DataArray back through ``to_geotiff`` |
| 9 | +silently injected a synthetic ``ModelPixelScale`` + ``ModelTiepoint`` |
| 10 | +because ``_coords_to_transform`` happily inferred a unit transform from |
| 11 | +the integer pixel coords. The next read then took the georef branch |
| 12 | +and the y/x dtype flipped from ``int64`` to ``float64`` with a |
| 13 | +``transform`` attr present. |
| 14 | +
|
| 15 | +The fix makes ``_coords_to_transform`` return ``None`` when either |
| 16 | +spatial coord array carries an integer dtype, so the no-georef contract |
| 17 | +survives an arbitrary number of read -> write -> read cycles across |
| 18 | +every writer path. |
| 19 | +""" |
| 20 | +from __future__ import annotations |
| 21 | + |
| 22 | +import importlib.util |
| 23 | +import os |
| 24 | +import tempfile |
| 25 | + |
| 26 | +import numpy as np |
| 27 | +import pytest |
| 28 | +import tifffile |
| 29 | +import xarray as xr |
| 30 | + |
| 31 | +from xrspatial.geotiff import _coords_to_transform, open_geotiff, to_geotiff |
| 32 | + |
| 33 | + |
| 34 | +def _gpu_available() -> bool: |
| 35 | + if importlib.util.find_spec("cupy") is None: |
| 36 | + return False |
| 37 | + try: |
| 38 | + import cupy |
| 39 | + return bool(cupy.cuda.is_available()) |
| 40 | + except Exception: |
| 41 | + return False |
| 42 | + |
| 43 | + |
| 44 | +_HAS_GPU = _gpu_available() |
| 45 | +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") |
| 46 | + |
| 47 | + |
| 48 | +# --------------------------------------------------------------------------- |
| 49 | +# Unit tests on _coords_to_transform |
| 50 | +# --------------------------------------------------------------------------- |
| 51 | + |
| 52 | + |
| 53 | +def test_coords_to_transform_returns_none_for_int64_y_coords(): |
| 54 | + """Integer y coords ``[0, 1, 2, ...]`` are the no-georef signal the |
| 55 | + read side emits; ``_coords_to_transform`` must not fabricate a |
| 56 | + transform from them.""" |
| 57 | + da = xr.DataArray( |
| 58 | + np.zeros((4, 5), dtype=np.float32), |
| 59 | + dims=['y', 'x'], |
| 60 | + coords={ |
| 61 | + 'y': np.arange(4, dtype=np.int64), |
| 62 | + 'x': np.arange(5, dtype=np.float64), |
| 63 | + }, |
| 64 | + ) |
| 65 | + assert _coords_to_transform(da) is None |
| 66 | + |
| 67 | + |
| 68 | +def test_coords_to_transform_returns_none_for_int64_x_coords(): |
| 69 | + """Same check on the other axis.""" |
| 70 | + da = xr.DataArray( |
| 71 | + np.zeros((4, 5), dtype=np.float32), |
| 72 | + dims=['y', 'x'], |
| 73 | + coords={ |
| 74 | + 'y': np.arange(4, dtype=np.float64), |
| 75 | + 'x': np.arange(5, dtype=np.int64), |
| 76 | + }, |
| 77 | + ) |
| 78 | + assert _coords_to_transform(da) is None |
| 79 | + |
| 80 | + |
| 81 | +def test_coords_to_transform_returns_none_for_int32_coords(): |
| 82 | + """int32 / int16 / uint also short-circuit -- the integer-dtype |
| 83 | + signal is "no georef", regardless of width.""" |
| 84 | + for kind in (np.int32, np.int16, np.uint32): |
| 85 | + da = xr.DataArray( |
| 86 | + np.zeros((4, 5), dtype=np.float32), |
| 87 | + dims=['y', 'x'], |
| 88 | + coords={ |
| 89 | + 'y': np.arange(4, dtype=kind), |
| 90 | + 'x': np.arange(5, dtype=kind), |
| 91 | + }, |
| 92 | + ) |
| 93 | + assert _coords_to_transform(da) is None, ( |
| 94 | + f"expected None for {kind.__name__} coords") |
| 95 | + |
| 96 | + |
| 97 | +def test_coords_to_transform_float_coords_unchanged(): |
| 98 | + """Float coords still produce a GeoTransform (sanity baseline).""" |
| 99 | + da = xr.DataArray( |
| 100 | + np.zeros((4, 5), dtype=np.float32), |
| 101 | + dims=['y', 'x'], |
| 102 | + coords={ |
| 103 | + 'y': np.array([0.5, 1.5, 2.5, 3.5]), |
| 104 | + 'x': np.array([10.0, 20.0, 30.0, 40.0, 50.0]), |
| 105 | + }, |
| 106 | + ) |
| 107 | + gt = _coords_to_transform(da) |
| 108 | + assert gt is not None |
| 109 | + assert gt.pixel_width == pytest.approx(10.0) |
| 110 | + assert gt.pixel_height == pytest.approx(1.0) |
| 111 | + |
| 112 | + |
| 113 | +def test_coords_to_transform_3d_yxband_int_y_returns_none(): |
| 114 | + """3D (y, x, band) with int y coords also short-circuits. |
| 115 | +
|
| 116 | + The helper picks the y/x dims (filtering out 'band'); the integer |
| 117 | + check must apply to those, not to the band axis. |
| 118 | + """ |
| 119 | + da = xr.DataArray( |
| 120 | + np.zeros((4, 5, 3), dtype=np.float32), |
| 121 | + dims=['y', 'x', 'band'], |
| 122 | + coords={ |
| 123 | + 'y': np.arange(4, dtype=np.int64), |
| 124 | + 'x': np.arange(5, dtype=np.int64), |
| 125 | + 'band': np.arange(3), |
| 126 | + }, |
| 127 | + ) |
| 128 | + assert _coords_to_transform(da) is None |
| 129 | + |
| 130 | + |
| 131 | +# --------------------------------------------------------------------------- |
| 132 | +# Round-trip tests through to_geotiff |
| 133 | +# --------------------------------------------------------------------------- |
| 134 | + |
| 135 | + |
| 136 | +def _make_no_georef_tiff(path, shape=(4, 5), seed=1949): |
| 137 | + """Write a TIFF with no GeoTIFF tags.""" |
| 138 | + arr = np.random.default_rng(seed=seed).random(shape).astype(np.float32) |
| 139 | + tifffile.imwrite( |
| 140 | + path, arr, photometric='minisblack', planarconfig='contig', |
| 141 | + metadata=None, |
| 142 | + ) |
| 143 | + return arr |
| 144 | + |
| 145 | + |
| 146 | +def test_round_trip_preserves_int_coords_eager(tmp_path): |
| 147 | + """Eager numpy round-trip: y/x coord dtype stays int64 and no |
| 148 | + transform attr is injected.""" |
| 149 | + src = str(tmp_path / "no_georef_src_1949.tif") |
| 150 | + _make_no_georef_tiff(src) |
| 151 | + |
| 152 | + da = open_geotiff(src) |
| 153 | + assert da.coords['y'].dtype == np.int64 |
| 154 | + assert da.coords['x'].dtype == np.int64 |
| 155 | + assert 'transform' not in da.attrs |
| 156 | + |
| 157 | + out = str(tmp_path / "no_georef_out_1949.tif") |
| 158 | + to_geotiff(da, out, compression='none', tiled=False) |
| 159 | + |
| 160 | + da2 = open_geotiff(out) |
| 161 | + assert da2.coords['y'].dtype == np.int64, ( |
| 162 | + f"round-trip flipped y coord dtype from int64 to " |
| 163 | + f"{da2.coords['y'].dtype}: {da2.coords['y'].values}") |
| 164 | + assert da2.coords['x'].dtype == np.int64 |
| 165 | + assert 'transform' not in da2.attrs, ( |
| 166 | + f"round-trip injected fake transform attr: " |
| 167 | + f"{da2.attrs.get('transform')}") |
| 168 | + np.testing.assert_array_equal( |
| 169 | + da2.coords['y'].values, da.coords['y'].values) |
| 170 | + np.testing.assert_array_equal( |
| 171 | + da2.coords['x'].values, da.coords['x'].values) |
| 172 | + |
| 173 | + |
| 174 | +def test_round_trip_preserves_int_coords_3d(tmp_path): |
| 175 | + """Same round-trip on a 3D multi-band file.""" |
| 176 | + src = str(tmp_path / "no_georef_src_3d_1949.tif") |
| 177 | + arr = np.random.default_rng(seed=1949).random( |
| 178 | + (4, 5, 3)).astype(np.float32) |
| 179 | + tifffile.imwrite(src, arr, photometric='minisblack', |
| 180 | + planarconfig='contig', metadata=None) |
| 181 | + |
| 182 | + da = open_geotiff(src) |
| 183 | + assert da.coords['y'].dtype == np.int64 |
| 184 | + assert da.coords['x'].dtype == np.int64 |
| 185 | + assert 'transform' not in da.attrs |
| 186 | + |
| 187 | + out = str(tmp_path / "no_georef_out_3d_1949.tif") |
| 188 | + to_geotiff(da, out, compression='none') |
| 189 | + |
| 190 | + da2 = open_geotiff(out) |
| 191 | + assert da2.coords['y'].dtype == np.int64 |
| 192 | + assert da2.coords['x'].dtype == np.int64 |
| 193 | + assert 'transform' not in da2.attrs |
| 194 | + |
| 195 | + |
| 196 | +def test_round_trip_dask_streaming_preserves_int_coords(tmp_path): |
| 197 | + """Same contract via the dask streaming writer path.""" |
| 198 | + src = str(tmp_path / "no_georef_src_dask_1949.tif") |
| 199 | + arr = np.random.default_rng(seed=1949).random( |
| 200 | + (64, 64)).astype(np.float32) |
| 201 | + tifffile.imwrite(src, arr, photometric='minisblack', |
| 202 | + planarconfig='contig', tile=(32, 32), |
| 203 | + compression='deflate', metadata=None) |
| 204 | + |
| 205 | + da_dk = open_geotiff(src, chunks=32) |
| 206 | + assert da_dk.coords['y'].dtype == np.int64 |
| 207 | + assert da_dk.coords['x'].dtype == np.int64 |
| 208 | + assert 'transform' not in da_dk.attrs |
| 209 | + |
| 210 | + out = str(tmp_path / "no_georef_out_dask_1949.tif") |
| 211 | + to_geotiff(da_dk, out, compression='deflate', tile_size=32) |
| 212 | + |
| 213 | + da_rt = open_geotiff(out) |
| 214 | + assert da_rt.coords['y'].dtype == np.int64 |
| 215 | + assert da_rt.coords['x'].dtype == np.int64 |
| 216 | + assert 'transform' not in da_rt.attrs |
| 217 | + |
| 218 | + |
| 219 | +def test_double_round_trip_stable(tmp_path): |
| 220 | + """Two consecutive read -> write -> read cycles produce identical |
| 221 | + coord arrays and attrs sets.""" |
| 222 | + src = str(tmp_path / "no_georef_double_1949.tif") |
| 223 | + _make_no_georef_tiff(src, shape=(6, 8)) |
| 224 | + |
| 225 | + da1 = open_geotiff(src) |
| 226 | + out1 = str(tmp_path / "no_georef_double_out1_1949.tif") |
| 227 | + to_geotiff(da1, out1, compression='none', tiled=False) |
| 228 | + |
| 229 | + da2 = open_geotiff(out1) |
| 230 | + out2 = str(tmp_path / "no_georef_double_out2_1949.tif") |
| 231 | + to_geotiff(da2, out2, compression='none', tiled=False) |
| 232 | + |
| 233 | + da3 = open_geotiff(out2) |
| 234 | + np.testing.assert_array_equal( |
| 235 | + da1.coords['y'].values, da3.coords['y'].values) |
| 236 | + np.testing.assert_array_equal( |
| 237 | + da1.coords['x'].values, da3.coords['x'].values) |
| 238 | + assert da3.coords['y'].dtype == da1.coords['y'].dtype |
| 239 | + assert da3.coords['x'].dtype == da1.coords['x'].dtype |
| 240 | + # ``transform`` should never appear in either attrs set |
| 241 | + assert 'transform' not in da1.attrs |
| 242 | + assert 'transform' not in da3.attrs |
| 243 | + |
| 244 | + |
| 245 | +def test_explicit_transform_attr_still_writes(tmp_path): |
| 246 | + """When the user explicitly sets ``attrs['transform']`` on a |
| 247 | + DataArray with int coords (an odd combination, but allowed), the |
| 248 | + writer still honours that explicit value -- the fix only catches the |
| 249 | + implicit-from-int-coords path.""" |
| 250 | + arr = np.zeros((4, 5), dtype=np.float32) |
| 251 | + da = xr.DataArray( |
| 252 | + arr, dims=['y', 'x'], |
| 253 | + coords={ |
| 254 | + 'y': np.arange(4, dtype=np.int64), |
| 255 | + 'x': np.arange(5, dtype=np.int64), |
| 256 | + }, |
| 257 | + attrs={'transform': (30.0, 0.0, 100000.0, 0.0, -30.0, 4000000.0), |
| 258 | + 'crs': 32610}, |
| 259 | + ) |
| 260 | + out = str(tmp_path / "explicit_transform_1949.tif") |
| 261 | + to_geotiff(da, out, compression='none', tiled=False) |
| 262 | + da_rt = open_geotiff(out) |
| 263 | + assert 'transform' in da_rt.attrs |
| 264 | + assert da_rt.attrs['transform'][0] == 30.0 |
| 265 | + assert da_rt.attrs['crs'] == 32610 |
| 266 | + |
| 267 | + |
| 268 | +@_gpu_only |
| 269 | +def test_gpu_writer_preserves_int_coords(tmp_path): |
| 270 | + """The GPU writer path goes through the same ``_coords_to_transform`` |
| 271 | + fallback; the fix has to extend there too.""" |
| 272 | + src = str(tmp_path / "no_georef_src_gpu_1949.tif") |
| 273 | + _make_no_georef_tiff(src, shape=(64, 64)) |
| 274 | + |
| 275 | + da_gpu = open_geotiff(src, gpu=True) |
| 276 | + assert da_gpu.coords['y'].dtype == np.int64 |
| 277 | + assert da_gpu.coords['x'].dtype == np.int64 |
| 278 | + assert 'transform' not in da_gpu.attrs |
| 279 | + |
| 280 | + out = str(tmp_path / "no_georef_out_gpu_1949.tif") |
| 281 | + to_geotiff(da_gpu, out, compression='deflate') |
| 282 | + |
| 283 | + da_rt = open_geotiff(out) |
| 284 | + assert da_rt.coords['y'].dtype == np.int64 |
| 285 | + assert da_rt.coords['x'].dtype == np.int64 |
| 286 | + assert 'transform' not in da_rt.attrs |
0 commit comments