Skip to content

Commit bfbb7f7

Browse files
committed
geotiff: refuse non-numeric nodata / _FillValue early (#1973)
attrs['_FillValue']='missing' (or attrs['nodata']='missing', or a non-numeric nodata= kwarg) used to crash deep inside the writer with ``ufunc 'isnan' not supported``. The TypeError did not name the offending attribute and gave no hint that nodata was the cause. Add _validate_nodata_arg in _validation.py and call it from to_geotiff, _write_vrt_tiled, and write_geotiff_gpu so the kwarg path is checked at the boundary. Tighten _resolve_nodata_attr to raise ValueError naming the bad attr when attrs['nodata'] or attrs['_FillValue'] is non-numeric. The rioxarray-style attrs['nodatavals'] branch keeps its existing skip-on-non-numeric behaviour (per-band tuples often carry placeholder entries from arbitrary upstream pipelines, so a single bad entry should not block writing).
1 parent a0a70aa commit bfbb7f7

5 files changed

Lines changed: 145 additions & 2 deletions

File tree

xrspatial/geotiff/_attrs.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,18 @@ def _resolve_nodata_attr(attrs: dict):
206206
"""
207207
nodata = attrs.get('nodata')
208208
if nodata is not None:
209+
try:
210+
float(nodata)
211+
except (TypeError, ValueError) as e:
212+
raise ValueError(
213+
f"attrs['nodata']={nodata!r} is not numeric "
214+
f"({type(nodata).__name__}). The writer needs a numeric "
215+
f"sentinel to compare against pixel values; passing a "
216+
f"non-numeric value would otherwise crash inside "
217+
f"``np.isnan`` with an opaque ufunc error. Drop the "
218+
f"attr, replace it with a numeric sentinel, or pass "
219+
f"``nodata=`` explicitly (issue #1973)."
220+
) from e
209221
return nodata
210222

211223
vals = attrs.get('nodatavals')
@@ -229,8 +241,16 @@ def _resolve_nodata_attr(attrs: dict):
229241
if fill is not None:
230242
try:
231243
ffv = float(fill)
232-
except (TypeError, ValueError):
233-
return fill # non-numeric -- pass through verbatim
244+
except (TypeError, ValueError) as e:
245+
raise ValueError(
246+
f"attrs['_FillValue']={fill!r} is not numeric "
247+
f"({type(fill).__name__}). The writer needs a numeric "
248+
f"sentinel to compare against pixel values; passing a "
249+
f"non-numeric value would otherwise crash inside "
250+
f"``np.isnan`` with an opaque ufunc error. Drop the "
251+
f"attr, replace it with a numeric sentinel, or pass "
252+
f"``nodata=`` explicitly (issue #1973)."
253+
) from e
234254
if np.isnan(ffv):
235255
return None
236256
return fill

xrspatial/geotiff/_validation.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,29 @@ def _validate_predictor_sample_format(predictor, sample_format) -> None:
230230
f"pair, e.g. `gdal_translate -co PREDICTOR=2` for integers or "
231231
f"`-co PREDICTOR=1` to drop the predictor."
232232
)
233+
234+
235+
def _validate_nodata_arg(nodata) -> None:
236+
"""Reject non-numeric ``nodata=`` at the writer entry point (#1973).
237+
238+
``None`` (no sentinel) and ``bool`` (kept for backwards compat;
239+
rejected at the dtype layer if it does not match) are passed
240+
through. Anything else is run through ``float()``: success means
241+
the writer's downstream ``np.isnan(nodata)`` and integer-cast
242+
paths will not blow up. Failure raises ``ValueError`` with the
243+
offending repr, so users see ``nodata='missing'`` flagged at the
244+
boundary instead of an opaque ``ufunc 'isnan' not supported``
245+
TypeError from inside the writer.
246+
"""
247+
if nodata is None:
248+
return
249+
try:
250+
float(nodata)
251+
except (TypeError, ValueError) as e:
252+
raise ValueError(
253+
f"nodata must be numeric or None, got {nodata!r} "
254+
f"(type {type(nodata).__name__}). The writer compares it "
255+
f"against pixel values via ``np.isnan`` and casts it to "
256+
f"the array dtype; a non-numeric value would otherwise "
257+
f"crash inside NumPy with a ufunc TypeError."
258+
) from e

xrspatial/geotiff/_writers/eager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
)
4343
from .._validation import (
4444
_validate_3d_writer_dims,
45+
_validate_nodata_arg,
4546
_validate_tile_size_arg,
4647
)
4748
from .._writer import write
@@ -267,6 +268,8 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
267268
if tiled:
268269
_validate_tile_size_arg(tile_size)
269270

271+
_validate_nodata_arg(nodata)
272+
270273
# Up-front validation: catch bad compression names before they reach
271274
# any of the deeper write paths (streaming, GPU, VRT, COG) where the
272275
# error surfaces from _compression_tag with a less obvious traceback.
@@ -775,6 +778,7 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
775778
This enables streaming dask arrays to disk without materializing the
776779
full array in RAM.
777780
"""
781+
_validate_nodata_arg(nodata)
778782
# Validate compression_level against codec-specific range
779783
if compression_level is not None:
780784
level_range = _LEVEL_RANGES.get(compression.lower())

xrspatial/geotiff/_writers/gpu.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from .._runtime import GeoTIFFFallbackWarning
2828
from .._validation import (
2929
_validate_3d_writer_dims,
30+
_validate_nodata_arg,
3031
_validate_tile_size_arg,
3132
)
3233

@@ -259,6 +260,7 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
259260
# write_geotiff_gpu is always tiled, so validate tile_size here and
260261
# keep parity with the public to_geotiff entry point.
261262
_validate_tile_size_arg(tile_size)
263+
_validate_nodata_arg(nodata)
262264
if max_z_error < 0:
263265
raise ValueError(
264266
f"max_z_error must be >= 0, got {max_z_error}")
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""Refuse non-numeric ``nodata=`` / ``attrs['_FillValue']`` (#1973).
2+
3+
The writer compares the resolved nodata against pixel values via
4+
``np.isnan`` and casts it to the array dtype. A non-numeric value
5+
used to fall through ``_resolve_nodata_attr`` (returned verbatim) or
6+
the ``nodata=`` kwarg path and then crash inside NumPy with
7+
``ufunc 'isnan' not supported``. Both the entry point and the attr
8+
resolution path now refuse non-numeric values up front with a clear
9+
error.
10+
"""
11+
from __future__ import annotations
12+
13+
import io
14+
15+
import numpy as np
16+
import pytest
17+
import xarray as xr
18+
19+
from xrspatial.geotiff import to_geotiff
20+
from xrspatial.geotiff._attrs import _resolve_nodata_attr
21+
from xrspatial.geotiff._validation import _validate_nodata_arg
22+
23+
24+
def _nan_square():
25+
return xr.DataArray(
26+
np.full((4, 4), np.nan, dtype=np.float32),
27+
coords={'y': np.arange(4.0), 'x': np.arange(4.0)},
28+
dims=('y', 'x'),
29+
)
30+
31+
32+
@pytest.mark.parametrize("bad", ['missing', object(), [1, 2]])
33+
def test_validate_nodata_arg_rejects_non_numeric(bad):
34+
with pytest.raises(ValueError, match="nodata must be numeric"):
35+
_validate_nodata_arg(bad)
36+
37+
38+
@pytest.mark.parametrize("ok", [None, 0, -9999, 1.5, np.float32(-1), np.int64(0)])
39+
def test_validate_nodata_arg_accepts_numeric_and_none(ok):
40+
_validate_nodata_arg(ok)
41+
42+
43+
def test_resolve_nodata_attr_rejects_non_numeric_fillvalue():
44+
with pytest.raises(ValueError, match="_FillValue"):
45+
_resolve_nodata_attr({'_FillValue': 'missing'})
46+
47+
48+
def test_resolve_nodata_attr_rejects_non_numeric_nodata_attr():
49+
with pytest.raises(ValueError, match=r"attrs\['nodata'\]"):
50+
_resolve_nodata_attr({'nodata': 'missing'})
51+
52+
53+
def test_resolve_nodata_attr_skips_non_numeric_in_nodatavals():
54+
# nodatavals (rioxarray's per-band tuple) keeps its skip-on-non-numeric
55+
# behaviour: those values often come from arbitrary upstream pipelines
56+
# and a single bad entry should not block writing.
57+
assert _resolve_nodata_attr({'nodatavals': ('NaN ', -9999.0)}) == -9999.0
58+
59+
60+
def test_resolve_nodata_attr_still_accepts_numeric_fillvalue():
61+
assert _resolve_nodata_attr({'_FillValue': -9999}) == -9999
62+
63+
64+
def test_resolve_nodata_attr_returns_none_for_nan_fillvalue():
65+
assert _resolve_nodata_attr({'_FillValue': float('nan')}) is None
66+
67+
68+
def test_to_geotiff_rejects_non_numeric_nodata_kwarg():
69+
buf = io.BytesIO()
70+
with pytest.raises(ValueError, match="nodata must be numeric"):
71+
to_geotiff(_nan_square(), buf, nodata='missing')
72+
73+
74+
def test_to_geotiff_rejects_non_numeric_fillvalue_attr():
75+
da = _nan_square()
76+
da.attrs['_FillValue'] = 'missing'
77+
buf = io.BytesIO()
78+
with pytest.raises(ValueError, match="_FillValue"):
79+
to_geotiff(da, buf)
80+
81+
82+
def test_to_geotiff_vrt_path_rejects_non_numeric_nodata(tmp_path):
83+
vrt_path = str(tmp_path / "tmp_1973_vrt.vrt")
84+
with pytest.raises(ValueError, match="nodata must be numeric"):
85+
to_geotiff(_nan_square(), vrt_path, nodata='missing')
86+
87+
88+
def test_to_geotiff_accepts_numeric_nodata_kwarg():
89+
buf = io.BytesIO()
90+
to_geotiff(_nan_square(), buf, nodata=-9999)
91+
assert buf.getbuffer().nbytes > 0

0 commit comments

Comments
 (0)