Skip to content

Commit c485361

Browse files
committed
geotiff: GPU + dask+GPU backend coverage for float16 read (#1941)
Issue #1941 added float16 auto-promotion on read and gated the GPU GDS path off for (bps=16, sf=float). The eager numpy and dask paths are covered by test_float16_read_1941.py; the cupy and dask+cupy paths had no targeted tests. A regression dropping the bps_mismatch fallback at _backends/gpu.py:357 or the float16 gate in _gds_chunk_path_available would silently mis-decode half-precision tiles and ship under existing CI. Adds 13 tests, all passing on a CUDA host: - read_geotiff_gpu on stripped + tiled (deflate, uncompressed) float16 - open_geotiff(gpu=True) dispatcher thread-through - windowed GPU reads on stripped + tiled float16 - open_geotiff(chunks=, gpu=True) and read_geotiff_gpu(chunks=) - _gds_chunk_path_available structural pin for (bps=16, sf=3) -> False plus a sanity check that float32 tiled files still allow GDS - cross-backend pixel-exact parity (numpy vs GPU, numpy vs dask+GPU, dask+numpy vs dask+GPU) - predictor=3 + float16 GPU round trip Mutation against bps_mismatch flipped 5 tests red; mutation against the GDS float16 gate flipped the structural test red.
1 parent 9ce0e60 commit c485361

1 file changed

Lines changed: 333 additions & 0 deletions

File tree

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
"""GPU backend coverage for issue #1941 (float16 read).
2+
3+
#1941 added float16 auto-promotion on read by making
4+
``tiff_dtype_to_numpy(16, SAMPLE_FORMAT_FLOAT)`` return ``float32`` and
5+
adding the on-disk ``tiff_storage_dtype`` companion. The eager numpy and
6+
dask paths are covered by ``test_float16_read_1941.py``; this module
7+
closes the GPU and dask+GPU coverage gap.
8+
9+
A regression that:
10+
11+
* dropped the ``bps_mismatch`` stripped/odd-bps fallback at
12+
``_backends/gpu.py:357`` would route float16 stripped reads through
13+
the tiled GPU decoder and mis-decode the half-precision samples;
14+
* dropped the ``bps_first == 16 and sample_format == SAMPLE_FORMAT_FLOAT``
15+
early-out at ``_backends/gpu.py:791`` in ``_gds_chunk_path_available``
16+
would send tiled float16 chunked reads down the kvikIO GDS path and
17+
mis-stride the buffer;
18+
* dropped the entry at ``(16, SAMPLE_FORMAT_FLOAT) -> float32`` in
19+
``tiff_dtype_to_numpy`` would surface as ``ValueError("Unsupported
20+
BitsPerSample=16, SampleFormat=3")`` from the GPU read paths.
21+
22+
Every test ships through ``read_geotiff_gpu`` directly or through
23+
``open_geotiff(..., gpu=True)`` so the dispatcher path is also wired in.
24+
``cuda-unavailable`` builds skip the suite via the project's standard
25+
``CUDA_AVAILABLE`` gate.
26+
"""
27+
from __future__ import annotations
28+
29+
import importlib.util
30+
import os
31+
32+
import numpy as np
33+
import pytest
34+
import xarray as xr
35+
36+
37+
def _gpu_available() -> bool:
38+
if importlib.util.find_spec("cupy") is None:
39+
return False
40+
try:
41+
import cupy
42+
43+
return bool(cupy.cuda.is_available())
44+
except Exception:
45+
return False
46+
47+
48+
_HAS_GPU = _gpu_available()
49+
pytestmark = pytest.mark.skipif(
50+
not _HAS_GPU, reason="cupy + CUDA required for GPU float16 read tests",
51+
)
52+
53+
54+
@pytest.fixture
55+
def float16_stripped_tif(tmp_path):
56+
"""Stripped float16 GeoTIFF: triggers the bps_mismatch CPU fallback.
57+
58+
``tifffile.imwrite`` without ``tile=`` produces a stripped layout, so
59+
the GPU reader hits ``bps_mismatch=True`` (file_dtype.itemsize*8 == 32
60+
but bps == 16) and falls back to ``_read_to_array`` on CPU before
61+
copying to device.
62+
"""
63+
tifffile = pytest.importorskip("tifffile")
64+
arr = np.array(
65+
[[0.0, 1.0, 2.0, 3.0],
66+
[-1.0, -2.0, -3.0, -4.0],
67+
[0.5, 1.5, 2.5, 3.5],
68+
[100.0, 200.0, 300.0, 400.0]],
69+
dtype=np.float16,
70+
)
71+
path = tmp_path / "f16_stripped.tif"
72+
tifffile.imwrite(str(path), arr, compression=None)
73+
return path, arr
74+
75+
76+
@pytest.fixture
77+
def float16_tiled_tif(tmp_path):
78+
"""Tiled float16 GeoTIFF: triggers the bps_mismatch tiled CPU fallback.
79+
80+
Tiled with a tile size matching the image, deflate-compressed.
81+
``bps_mismatch`` short-circuits the tiled GPU decode path and routes
82+
through the CPU decoder; the GDS path is also gated off via
83+
``_gds_chunk_path_available`` returning False for (bps=16, sf=3).
84+
"""
85+
tifffile = pytest.importorskip("tifffile")
86+
arr = np.arange(256, dtype=np.float16).reshape(16, 16)
87+
path = tmp_path / "f16_tiled.tif"
88+
tifffile.imwrite(
89+
str(path), arr, compression="deflate", tile=(16, 16))
90+
return path, arr
91+
92+
93+
@pytest.fixture
94+
def float16_tiled_uncompressed_tif(tmp_path):
95+
"""Tiled uncompressed float16 GeoTIFF.
96+
97+
Mirrors ``float16_tiled_tif`` but with ``compression=None`` so the
98+
tile-decode path is exercised without an extra deflate codec call.
99+
Tile size 16 is the smallest tifffile allows.
100+
"""
101+
tifffile = pytest.importorskip("tifffile")
102+
arr = np.arange(256, dtype=np.float16).reshape(16, 16)
103+
path = tmp_path / "f16_tiled_none.tif"
104+
tifffile.imwrite(
105+
str(path), arr, compression=None, tile=(16, 16))
106+
return path, arr
107+
108+
109+
class TestEagerGPUReadFloat16:
110+
"""``read_geotiff_gpu`` returns float32 for stripped float16 input."""
111+
112+
def test_read_geotiff_gpu_stripped_returns_float32(
113+
self, float16_stripped_tif
114+
):
115+
from xrspatial.geotiff import read_geotiff_gpu
116+
117+
path, arr = float16_stripped_tif
118+
result = read_geotiff_gpu(str(path))
119+
assert result.dtype == np.float32, (
120+
f"GPU read of float16 must return float32, got {result.dtype}"
121+
)
122+
np.testing.assert_array_equal(
123+
result.data.get(), arr.astype(np.float32))
124+
125+
def test_read_geotiff_gpu_tiled_returns_float32(
126+
self, float16_tiled_tif
127+
):
128+
from xrspatial.geotiff import read_geotiff_gpu
129+
130+
path, arr = float16_tiled_tif
131+
result = read_geotiff_gpu(str(path))
132+
assert result.dtype == np.float32
133+
np.testing.assert_array_equal(
134+
result.data.get(), arr.astype(np.float32))
135+
136+
def test_read_geotiff_gpu_tiled_uncompressed_returns_float32(
137+
self, float16_tiled_uncompressed_tif
138+
):
139+
from xrspatial.geotiff import read_geotiff_gpu
140+
141+
path, arr = float16_tiled_uncompressed_tif
142+
result = read_geotiff_gpu(str(path))
143+
assert result.dtype == np.float32
144+
np.testing.assert_array_equal(
145+
result.data.get(), arr.astype(np.float32))
146+
147+
def test_open_geotiff_gpu_dispatcher_float16(self, float16_tiled_tif):
148+
"""``open_geotiff(gpu=True)`` dispatches correctly for float16."""
149+
from xrspatial.geotiff import open_geotiff
150+
151+
path, arr = float16_tiled_tif
152+
result = open_geotiff(str(path), gpu=True)
153+
assert result.dtype == np.float32
154+
np.testing.assert_array_equal(
155+
result.data.get(), arr.astype(np.float32))
156+
157+
158+
class TestGPUWindowedFloat16:
159+
"""Windowed GPU reads honour the bps_mismatch fallback path."""
160+
161+
def test_read_geotiff_gpu_windowed_stripped(self, float16_stripped_tif):
162+
from xrspatial.geotiff import read_geotiff_gpu
163+
164+
path, arr = float16_stripped_tif
165+
result = read_geotiff_gpu(str(path), window=(0, 0, 2, 2))
166+
assert result.dtype == np.float32
167+
assert result.shape == (2, 2)
168+
np.testing.assert_array_equal(
169+
result.data.get(), arr[:2, :2].astype(np.float32))
170+
171+
def test_read_geotiff_gpu_windowed_tiled(self, float16_tiled_tif):
172+
from xrspatial.geotiff import read_geotiff_gpu
173+
174+
path, arr = float16_tiled_tif
175+
result = read_geotiff_gpu(str(path), window=(0, 0, 8, 8))
176+
assert result.dtype == np.float32
177+
assert result.shape == (8, 8)
178+
np.testing.assert_array_equal(
179+
result.data.get(), arr[:8, :8].astype(np.float32))
180+
181+
182+
class TestDaskGPUFloat16:
183+
"""``open_geotiff(chunks=, gpu=True)`` decodes float16 correctly."""
184+
185+
def test_dask_gpu_tiled_float16(self, float16_tiled_tif):
186+
from xrspatial.geotiff import open_geotiff
187+
188+
path, arr = float16_tiled_tif
189+
result = open_geotiff(str(path), chunks=8, gpu=True)
190+
assert result.dtype == np.float32, (
191+
f"dask+GPU read of float16 must return float32, got {result.dtype}"
192+
)
193+
# Compute the dask array; under dask+cupy, .compute() yields a
194+
# cupy-backed DataArray, so the .data.get() step pulls to host.
195+
computed = result.compute()
196+
np.testing.assert_array_equal(
197+
computed.data.get(), arr.astype(np.float32))
198+
199+
def test_read_geotiff_gpu_chunks_kwarg_float16(self, float16_tiled_tif):
200+
"""``read_geotiff_gpu(chunks=)`` also routes correctly."""
201+
from xrspatial.geotiff import read_geotiff_gpu
202+
203+
path, arr = float16_tiled_tif
204+
result = read_geotiff_gpu(str(path), chunks=8)
205+
assert result.dtype == np.float32
206+
computed = result.compute()
207+
np.testing.assert_array_equal(
208+
computed.data.get(), arr.astype(np.float32))
209+
210+
211+
class TestGDSPathGatedOffForFloat16:
212+
"""``_gds_chunk_path_available`` returns False for (bps=16, sf=3).
213+
214+
Direct structural test of the gating logic added in #1941 to keep the
215+
KvikIO GDS chunked path from mis-decoding half-precision tiles. A
216+
regression dropping the float16 guard would silently corrupt every
217+
chunked GPU read of a float16 source.
218+
"""
219+
220+
def test_gds_path_gated_off_for_float16(self, float16_tiled_tif):
221+
from xrspatial.geotiff._backends.gpu import _gds_chunk_path_available
222+
from xrspatial.geotiff._header import parse_all_ifds, parse_header
223+
224+
path, _ = float16_tiled_tif
225+
with open(str(path), "rb") as f:
226+
data = f.read()
227+
header = parse_header(data)
228+
ifds = parse_all_ifds(data, header)
229+
ifd = ifds[0]
230+
231+
# Sanity-check fixture: tiled, bps=16, sample_format=3 (float)
232+
from xrspatial.geotiff._dtypes import SAMPLE_FORMAT_FLOAT
233+
assert ifd.is_tiled, "fixture sanity: tiled layout expected"
234+
bps = ifd.bits_per_sample
235+
if isinstance(bps, tuple):
236+
bps = bps[0]
237+
assert bps == 16, "fixture sanity: bps=16 expected"
238+
assert ifd.sample_format == SAMPLE_FORMAT_FLOAT
239+
240+
result = _gds_chunk_path_available(
241+
str(path), ifd, has_sparse_tile=False, orientation=1)
242+
assert result is False, (
243+
"_gds_chunk_path_available must return False for "
244+
"(bps=16, sf=float) so the GDS chunked path does not "
245+
"mis-decode half-precision tiles."
246+
)
247+
248+
def test_gds_path_allowed_for_float32_tiled(self, tmp_path):
249+
"""Sanity: GDS path remains allowed for a float32 tiled file.
250+
251+
Pins that the float16 guard at gpu.py:791 fires only on
252+
(bps=16, sf=float), not on every tiled float file. A regression
253+
widening the guard to all floats would silently disable the
254+
GDS path on every float32 tiled COG.
255+
"""
256+
tifffile = pytest.importorskip("tifffile")
257+
pytest.importorskip("kvikio")
258+
259+
arr = np.arange(256, dtype=np.float32).reshape(16, 16)
260+
path = tmp_path / "f32_tiled.tif"
261+
tifffile.imwrite(
262+
str(path), arr, compression="deflate", tile=(16, 16))
263+
264+
from xrspatial.geotiff._backends.gpu import _gds_chunk_path_available
265+
from xrspatial.geotiff._header import parse_all_ifds, parse_header
266+
267+
with open(str(path), "rb") as f:
268+
data = f.read()
269+
header = parse_header(data)
270+
ifds = parse_all_ifds(data, header)
271+
272+
result = _gds_chunk_path_available(
273+
str(path), ifds[0], has_sparse_tile=False, orientation=1)
274+
assert result is True, (
275+
"_gds_chunk_path_available must remain True for "
276+
"(bps=32, sf=float) tiled files so the kvikio GDS chunk "
277+
"path still applies."
278+
)
279+
280+
281+
class TestBackendParityFloat16:
282+
"""All four backends agree pixel-exact on float16 input."""
283+
284+
def test_eager_numpy_equals_gpu(self, float16_tiled_tif):
285+
from xrspatial.geotiff import open_geotiff
286+
287+
path, _ = float16_tiled_tif
288+
cpu = open_geotiff(str(path))
289+
gpu = open_geotiff(str(path), gpu=True)
290+
291+
assert cpu.dtype == gpu.dtype == np.float32
292+
np.testing.assert_array_equal(np.asarray(cpu), gpu.data.get())
293+
294+
def test_eager_numpy_equals_dask_gpu(self, float16_tiled_tif):
295+
from xrspatial.geotiff import open_geotiff
296+
297+
path, _ = float16_tiled_tif
298+
cpu = open_geotiff(str(path))
299+
dask_gpu = open_geotiff(str(path), chunks=8, gpu=True).compute()
300+
301+
assert cpu.dtype == dask_gpu.dtype == np.float32
302+
np.testing.assert_array_equal(
303+
np.asarray(cpu), dask_gpu.data.get())
304+
305+
def test_dask_numpy_equals_dask_gpu(self, float16_tiled_tif):
306+
from xrspatial.geotiff import open_geotiff, read_geotiff_dask
307+
308+
path, _ = float16_tiled_tif
309+
dask_cpu = read_geotiff_dask(str(path), chunks=8).compute()
310+
dask_gpu = open_geotiff(str(path), chunks=8, gpu=True).compute()
311+
312+
np.testing.assert_array_equal(
313+
np.asarray(dask_cpu), dask_gpu.data.get())
314+
315+
316+
class TestPredictor3Float16GPU:
317+
"""Predictor=3 + float16 on disk also decodes correctly on GPU."""
318+
319+
def test_predictor3_float16_gpu_round_trip(self, tmp_path):
320+
tifffile = pytest.importorskip("tifffile")
321+
pytest.importorskip("imagecodecs") # required for predictor=3
322+
323+
from xrspatial.geotiff import read_geotiff_gpu
324+
325+
arr = np.linspace(-1.0, 1.0, 16).astype(np.float16).reshape(4, 4)
326+
path = tmp_path / "pred3_f16.tif"
327+
tifffile.imwrite(
328+
str(path), arr, predictor=3, compression="deflate")
329+
330+
result = read_geotiff_gpu(str(path))
331+
assert result.dtype == np.float32
332+
np.testing.assert_array_equal(
333+
result.data.get(), arr.astype(np.float32))

0 commit comments

Comments
 (0)