Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 69 additions & 20 deletions xrspatial/rasterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,31 @@ def _merge_overwrite(pixel, props, is_first):

@ngjit
def _merge_max(pixel, props, is_first):
if is_first or props[0] > pixel:
return props[0]
val = props[0]
# NaN burn poisons max regardless of order. Without this branch the
# outcome would depend on whether the NaN burn happened to land before
# or after the finite burns ('NaN > finite' is False, so a NaN burn
# arriving second is silently dropped). Issue #2255.
if val != val:
return val
if is_first:
return val
# If pixel is already NaN from an earlier burn, 'val > NaN' is False
# below and we fall through to 'return pixel', keeping NaN sticky.
if val > pixel:
return val
return pixel


@ngjit
def _merge_min(pixel, props, is_first):
if is_first or props[0] < pixel:
return props[0]
val = props[0]
if val != val:
return val
if is_first:
return val
if val < pixel:
return val
return pixel


Expand Down Expand Up @@ -1318,10 +1334,25 @@ def _apply_merge_gpu(out, written, order, r, c, props, new_idx):
cuda.atomic.add(out, (r, c), 1.0)
cuda.atomic.max(order, (r, c), new_idx)
elif atomic_mode == 'min':
cuda.atomic.min(out, (r, c), props[0])
val = props[0]
# CUDA atomicMin uses a < comparison, which returns False for
# any NaN operand, so a NaN burn would silently leave the +inf
# initialiser in place. Track NaN burns in ``written`` (sized
# H*W int32 for min/max, see _gpu_init_buffers) and stamp NaN
# into ``out`` at finalize. Issue #2255.
if val == val:
cuda.atomic.min(out, (r, c), val)
else:
cuda.atomic.max(written, (r, c), 1)
cuda.atomic.max(order, (r, c), new_idx)
elif atomic_mode == 'max':
cuda.atomic.max(out, (r, c), props[0])
val = props[0]
# See min branch above; the -inf initialiser would otherwise
# win against any NaN burn. Issue #2255.
if val == val:
cuda.atomic.max(out, (r, c), val)
else:
cuda.atomic.max(written, (r, c), 1)
cuda.atomic.max(order, (r, c), new_idx)
elif atomic_mode == 'first':
# Pass 1: resolve winning input index per pixel.
Expand Down Expand Up @@ -1882,12 +1913,22 @@ def _gpu_init_buffers(height, width, fill, merge_name):
sentinel value used in ``order`` so the caller can blend ``fill``
back in afterwards.

The atomic kernels never consult ``written`` (the legacy
user-callable path is the only consumer), so atomic modes get a
placeholder ``(1, 1)`` buffer to satisfy the kernel signature
without spending ``H*W`` bytes on dead storage.
Most atomic kernels never consult ``written`` (the legacy
user-callable path is the only consumer of the H*W int8 mask), so
they get a placeholder ``(1, 1)`` buffer to satisfy the kernel
signature without spending ``H*W`` bytes on dead storage.

``min`` / ``max`` repurpose ``written`` as a NaN-burn mask: the
+/-inf initialisers used by the atomic min/max kernels would
otherwise outrank any NaN burn ('NaN > finite' is False), silently
dropping NaN-valued geometries. The mask records which pixels saw
a NaN burn so finalize can stamp NaN back in. Issue #2255.
``cuda.atomic.max`` requires int32 / int64 / uint32 / uint64 (int8
is not supported), hence the dtype bump for these two modes.
"""
if merge_name in _GPU_ATOMIC_MERGES:
if merge_name in ('min', 'max'):
written = cupy.zeros((height, width), dtype=cupy.int32)
elif merge_name in _GPU_ATOMIC_MERGES:
written = cupy.zeros((1, 1), dtype=cupy.int8)
else:
written = cupy.zeros((height, width), dtype=cupy.int8)
Expand Down Expand Up @@ -1924,11 +1965,16 @@ def _gpu_init_buffers(height, width, fill, merge_name):
return out, written, order, order_sentinel


def _gpu_finalize_buffers(out, order, order_sentinel, fill, merge_name):
def _gpu_finalize_buffers(out, written, order, order_sentinel, fill,
merge_name):
"""Apply the post-pass blend that restores ``fill`` for untouched
pixels (sum/count/min/max/first/last) and replaces the +/- inf
initialisers used by min/max.

For ``min`` / ``max``, also stamps NaN into pixels that received at
least one NaN burn -- ``written`` is repurposed as the NaN-mask for
those modes (see ``_gpu_init_buffers``). Issue #2255.

Returns ``out`` (modified in-place where convenient).
"""
if merge_name is None or order_sentinel is None:
Expand All @@ -1937,10 +1983,13 @@ def _gpu_finalize_buffers(out, order, order_sentinel, fill, merge_name):
if merge_name in ('sum', 'count'):
# Pixels with no contribution should show ``fill``, not 0.
out = cupy.where(touched, out, cupy.float64(fill))
elif merge_name == 'min':
# +inf initialiser leaks into untouched pixels.
out = cupy.where(touched, out, cupy.float64(fill))
elif merge_name == 'max':
elif merge_name in ('min', 'max'):
# Any pixel that saw a NaN burn must show NaN, otherwise the
# +/-inf initialiser would leak through (or the finite max/min
# of the non-NaN burns would silently win). Issue #2255.
nan_burned = written > 0
out = cupy.where(nan_burned, cupy.float64(cupy.nan), out)
# +/-inf initialiser leaks into untouched pixels; restore fill.
out = cupy.where(touched, out, cupy.float64(fill))
# first/last already initialise ``out`` to ``fill``; pass-2 only
# writes for touched pixels, so no blend is needed.
Expand Down Expand Up @@ -2069,8 +2118,8 @@ def _launch_phase(k_scan, k_lines, k_boundary, k_points):
kernels['burn_lines_supercover_pass2'],
kernels['burn_points_pass2'])

final_out = _gpu_finalize_buffers(out, order, order_sentinel, fill,
atomic_mode)
final_out = _gpu_finalize_buffers(out, written, order, order_sentinel,
fill, atomic_mode)
return final_out.astype(dtype)


Expand Down Expand Up @@ -2531,8 +2580,8 @@ def _launch_phase(k_scan, k_lines, k_boundary, k_points):
kernels['burn_lines_supercover_pass2'],
kernels['burn_points_pass2'])

final_out = _gpu_finalize_buffers(out, order, order_sentinel, fill,
atomic_mode)
final_out = _gpu_finalize_buffers(out, written, order, order_sentinel,
fill, atomic_mode)
return final_out.astype(dtype)


Expand Down
87 changes: 4 additions & 83 deletions xrspatial/tests/test_rasterize_coverage_2026_05_21.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,10 @@ class TestNaNBurnValues:
leave the fill value in covered cells, which is a different
observable than emitting NaN there.

NOTE: the GPU ``max`` / ``min`` merges currently suppress NaN burn
values, asymmetric with the CPU IEEE-propagating behaviour pinned
here. See issue #2255. The
``test_nan_burn_overlaps_max_gpu_suppresses_nan`` and
``test_nan_burn_single_geom_max_gpu_returns_neg_inf`` tests below
pin the current (asymmetric) GPU observable so the divergence is
visible in CI; both will need to be inverted once the GPU kernels
are aligned with CPU semantics.
Issue #2255 aligned the cross-backend contract on strict NaN
propagation for ``max`` / ``min``: any NaN burn poisons the output
pixel regardless of order. See ``TestNaNPropagationAcrossBackends``
below for the cross-backend pins.
"""

@pytest.mark.parametrize('backend_name,kw', ALL_BACKENDS)
Expand Down Expand Up @@ -296,81 +292,6 @@ def test_nan_burn_line(self, backend_name, kw):
f"got per-row NaN counts {nan_rows}"
)

# CPU backends (numpy / dask+numpy) follow IEEE: max(NaN, 1.0) returns
# NaN because the comparison ``1.0 > NaN`` is False, so the kernel
# keeps the prior NaN pixel. GPU backends (cupy / dask+cupy) currently
# return 1.0: the GPU kernel initialises the output buffer to -inf for
# max merge and uses atomic_max which is NaN-suppressing under IEEE
# device semantics. See issue #2255. The CPU pin and the GPU
# asymmetry pin keep the divergence visible until the GPU kernels are
# aligned with the CPU is_first semantics.

@pytest.mark.parametrize('backend_name,kw', [
ALL_BACKENDS[0], # numpy
ALL_BACKENDS[2], # dask_numpy
])
def test_nan_burn_overlaps_max_cpu_propagates(self, backend_name, kw):
"""CPU: ``max(NaN, 1.0) == NaN``.

Pin the IEEE NaN-propagating behaviour so a future change that
switches the CPU max to ``fmax`` (NaN-suppressing) is visible as
a diff, not silent.
"""
r = rasterize(
[(box(0, 0, 10, 5), np.nan), (box(0, 0, 10, 5), 1.0)],
width=10, height=5, bounds=(0, 0, 10, 5),
fill=0, merge='max', **kw)
data = _materialise(r)
assert np.all(np.isnan(data))

@pytest.mark.parametrize('backend_name,kw', [
ALL_BACKENDS[1], # cupy
ALL_BACKENDS[3], # dask_cupy
])
def test_nan_burn_overlaps_max_gpu_suppresses_nan(self, backend_name, kw):
"""GPU: ``max(NaN, 1.0) == 1.0`` (NaN-suppressing).

Asymmetric with the CPU behaviour above; see issue #2255.
Pinning the current observable here so the GPU<->CPU divergence
is documented in CI until the GPU max kernel can be aligned with
IEEE NaN propagation.
"""
r = rasterize(
[(box(0, 0, 10, 5), np.nan), (box(0, 0, 10, 5), 1.0)],
width=10, height=5, bounds=(0, 0, 10, 5),
fill=0, merge='max', **kw)
data = _materialise(r)
# Current (asymmetric) GPU behaviour: NaN was suppressed and the
# finite 1.0 won. This pin will need to flip to ``np.all(isnan)``
# once the GPU max kernel is fixed to match the CPU IEEE
# propagation.
np.testing.assert_array_equal(data, np.ones_like(data))

@pytest.mark.parametrize('backend_name,kw', [
ALL_BACKENDS[1], # cupy
ALL_BACKENDS[3], # dask_cupy
])
def test_nan_burn_single_geom_max_gpu_returns_neg_inf(
self, backend_name, kw):
"""GPU: a single NaN-burn polygon under ``max`` leaves the
kernel's -inf init value in covered pixels.

On CPU the value would be NaN (the burn was the only write and
``_merge_max`` returns ``props[0]`` on first-write). On GPU the
buffer is initialised to -inf and atomicMax(-inf, NaN) keeps -inf
because NaN comparisons are False. See issue #2255. Pin so the
divergence is visible against the CPU behaviour pinned by
test_nan_burn_polygon[numpy].
"""
r = rasterize([(box(0, 0, 10, 5), np.nan)],
width=10, height=5, bounds=(0, 0, 10, 5),
fill=0, merge='max', **kw)
data = _materialise(r)
# The 5x10 covered region is -inf, not NaN, not 0.
assert np.all(np.isneginf(data)), (
f"{backend_name}: expected -inf in covered pixels, got {data}"
)


# ---------------------------------------------------------------------------
# Cat 1 MEDIUM -- Nested GeometryCollection
Expand Down
Loading
Loading