Skip to content

Commit 0ad97ff

Browse files
authored
geotiff: extract GPU helpers to _backends/_gpu_helpers.py (#1884) (#1894)
Step 6 of #1813's multi-PR refactor of __init__.py. Pure code motion; no public API change. First PR to create the _backends/ subpackage. Created xrspatial/geotiff/_backends/__init__.py (empty placeholder) and moved into _backends/_gpu_helpers.py: - _is_gpu_data: cupy-backed array / DataArray detection. - _apply_nodata_mask_gpu: cupy mirror of the CPU sentinel-to-NaN mask (handles float and integer arrays, with the same out-of-range / fractional / non-finite skips as _resolve_masked_fill in _reader.py). - _gpu_decode_single_band_tiles: two-stage GPU decode (GDS then CPU-extracted bytes) for the PlanarConfiguration=2 path. - _apply_orientation_gpu: cupy mirror of the CPU orientation-flip helper for the eight TIFF Orientation tag values. - _apply_orientation_geo_info: GeoTransform update after an orientation flip, with the #1765 refusal for georeferenced orientations 5-8. - _gpu_apply_window_band: post-decode window + band slice on device. Every helper lazy-imports cupy at call time so the module imports cleanly on numpy-only environments. __init__.py re-imports the helpers from _backends._gpu_helpers so existing callers (read_geotiff_gpu, to_geotiff's GPU branch, write_geotiff_gpu) keep their identical signatures. Leading-underscore module name (_gpu_helpers.py) keeps _backends/gpu.py free for the read_geotiff_gpu entry-point body in step 7. Net __init__.py change: 4271 -> 3969 lines (-302). _gpu_helpers.py is 335 lines. Verification: pixel-parity matrix from #1889 (192 cells, all GPU cells included), runtime identity (9), GPU byteswap (#1508), overview nodata inheritance (#1739) -- 230/230 pass. Refs #1813.
1 parent d0b4148 commit 0ad97ff

3 files changed

Lines changed: 351 additions & 310 deletions

File tree

xrspatial/geotiff/__init__.py

Lines changed: 8 additions & 310 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@
6868
_populate_attrs_from_geo_info,
6969
_resolve_nodata_attr,
7070
)
71+
from ._backends._gpu_helpers import (
72+
_apply_nodata_mask_gpu,
73+
_apply_orientation_geo_info,
74+
_apply_orientation_gpu,
75+
_gpu_apply_window_band,
76+
_gpu_decode_single_band_tiles,
77+
_is_gpu_data,
78+
)
7179
from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg
7280
from ._reader import read_to_array as _read_to_array
7381
from ._runtime import (
@@ -503,76 +511,6 @@ def open_geotiff(source: str | BinaryIO, *,
503511
return da
504512

505513

506-
def _is_gpu_data(data) -> bool:
507-
"""Check if data is CuPy-backed (raw array or DataArray)."""
508-
try:
509-
import cupy
510-
_cupy_type = cupy.ndarray
511-
except ImportError:
512-
return False
513-
514-
if isinstance(data, xr.DataArray):
515-
raw = data.data
516-
if hasattr(raw, 'compute'):
517-
meta = getattr(raw, '_meta', None)
518-
return isinstance(meta, _cupy_type)
519-
return isinstance(raw, _cupy_type)
520-
return isinstance(data, _cupy_type)
521-
522-
523-
def _apply_nodata_mask_gpu(arr_gpu, nodata):
524-
"""Replace nodata sentinel values with NaN on a CuPy array.
525-
526-
Mirrors the CPU eager path in :func:`open_geotiff` (lines around the
527-
``# Apply nodata mask`` comment). Float arrays get NaN substituted in
528-
place of the sentinel; integer arrays are promoted to float64 with NaN
529-
where the sentinel was. NaN nodata on a float array is a no-op (the
530-
sentinel already matches NaN-aware code).
531-
532-
Returns the (possibly promoted, possibly nodata-masked) CuPy array.
533-
The caller is responsible for setting ``attrs['nodata']`` so the
534-
sentinel is still discoverable downstream.
535-
"""
536-
import cupy
537-
538-
if nodata is None:
539-
return arr_gpu
540-
arr_dtype = np.dtype(str(arr_gpu.dtype))
541-
if arr_dtype.kind == 'f':
542-
if not np.isnan(nodata):
543-
sentinel = arr_dtype.type(nodata)
544-
arr_gpu = cupy.where(arr_gpu == sentinel,
545-
arr_dtype.type('nan'), arr_gpu)
546-
return arr_gpu
547-
if arr_dtype.kind in ('u', 'i'):
548-
# Out-of-range sentinels (e.g. uint16 + GDAL_NODATA="-9999") cannot
549-
# match any decoded pixel; skip the cast that would otherwise raise
550-
# OverflowError. A non-finite sentinel ("NaN" / "Inf" GDAL_NODATA
551-
# strings) also cannot match an integer pixel and would raise
552-
# ValueError on ``int(nodata)``; gate on ``np.isfinite`` first to
553-
# mirror ``_resolve_masked_fill`` in ``_reader.py`` (#1774). A
554-
# fractional sentinel (e.g. ``"3.5"`` on a ``uint16`` file) also
555-
# cannot match an integer pixel and ``int(3.5)`` would truncate
556-
# to 3, silently masking a real pixel value; gate on
557-
# ``float(nodata).is_integer()`` as well (mirrors the
558-
# ``_writer.py`` / ``_vrt.py`` pattern used for #1564 / #1616).
559-
# attrs['nodata'] is still set by the caller so the original
560-
# sentinel survives a write round-trip.
561-
if not (np.isfinite(nodata) and float(nodata).is_integer()):
562-
return arr_gpu
563-
nodata_int = int(nodata)
564-
info = np.iinfo(arr_dtype)
565-
if not (info.min <= nodata_int <= info.max):
566-
return arr_gpu
567-
sentinel = arr_dtype.type(nodata_int)
568-
mask = arr_gpu == sentinel
569-
if bool(mask.any().item()):
570-
arr_gpu = arr_gpu.astype(cupy.float64)
571-
arr_gpu = cupy.where(mask, cupy.float64('nan'), arr_gpu)
572-
return arr_gpu
573-
return arr_gpu
574-
575-
576514
def to_geotiff(data: xr.DataArray | np.ndarray,
577515
path: str | BinaryIO, *,
578516
crs: int | str | None = None,
@@ -1849,246 +1787,6 @@ def _read(http_meta):
18491787
return _read(http_meta_key)
18501788

18511789

1852-
def _gpu_decode_single_band_tiles(
1853-
source, lazy_data, offsets, byte_counts,
1854-
tw, th, width, height,
1855-
compression, predictor, file_dtype,
1856-
*,
1857-
byte_order: str,
1858-
gpu: str,
1859-
):
1860-
"""Decode one band's tile sequence into a 2-D ``(H, W)`` cupy array.
1861-
1862-
Helper for the ``PlanarConfiguration=2`` GPU path: the existing
1863-
``gpu_decode_tiles`` / ``gpu_decode_tiles_from_file`` kernels assume
1864-
a single chunky tile sequence with ``bytes_per_pixel = itemsize *
1865-
samples``. For planar=2 each band has its own list of tiles, so we
1866-
invoke those kernels once per band with ``samples=1`` and stack the
1867-
resulting 2-D arrays into ``(H, W, samples)`` afterwards.
1868-
1869-
Mirrors the two-stage GPU pipeline in ``read_geotiff_gpu`` -- GDS
1870-
first, then CPU-extracted-tiles GPU decode. ``lazy_data`` is a
1871-
zero-arg callable that returns the full file bytes; it caches its
1872-
result so the first band that needs the stage-2 fallback pays the
1873-
``read_all()``, and subsequent bands reuse the same buffer. When
1874-
every band's GDS path succeeds the file is never read at all.
1875-
Sparse tiles are not expected here; the caller routes sparse files
1876-
to the CPU path.
1877-
1878-
Auto-mode semantics: a stage-1 GDS failure warns and falls through
1879-
to stage 2; a stage-2 failure warns and returns ``None`` so the
1880-
caller can trigger a whole-image CPU fallback (a per-band CPU
1881-
decode would require a single-band CPU path keyed off planar
1882-
config, which doesn't exist). Strict mode re-raises from either
1883-
stage.
1884-
"""
1885-
from ._gpu_decode import gpu_decode_tiles, gpu_decode_tiles_from_file
1886-
1887-
arr_gpu = None
1888-
try:
1889-
arr_gpu = gpu_decode_tiles_from_file(
1890-
source, offsets, byte_counts,
1891-
tw, th, width, height,
1892-
compression, predictor, file_dtype, 1,
1893-
byte_order=byte_order,
1894-
)
1895-
except Exception as e:
1896-
if gpu == 'strict' or _geotiff_strict_mode():
1897-
raise
1898-
warnings.warn(
1899-
f"read_geotiff_gpu: GPU decode failed "
1900-
f"({type(e).__name__}: {e}); falling back to CPU.",
1901-
RuntimeWarning,
1902-
stacklevel=3,
1903-
)
1904-
arr_gpu = None
1905-
1906-
if arr_gpu is None:
1907-
shared_data = lazy_data()
1908-
compressed_tiles = [
1909-
bytes(shared_data[offsets[i]:offsets[i] + byte_counts[i]])
1910-
for i in range(len(offsets))
1911-
]
1912-
try:
1913-
arr_gpu = gpu_decode_tiles(
1914-
compressed_tiles,
1915-
tw, th, width, height,
1916-
compression, predictor, file_dtype, 1,
1917-
byte_order=byte_order,
1918-
)
1919-
except Exception as e:
1920-
if gpu == 'strict' or _geotiff_strict_mode():
1921-
raise
1922-
warnings.warn(
1923-
f"read_geotiff_gpu: GPU decode failed "
1924-
f"({type(e).__name__}: {e}); falling back to CPU.",
1925-
RuntimeWarning,
1926-
stacklevel=3,
1927-
)
1928-
return None
1929-
1930-
if arr_gpu.ndim != 2 or arr_gpu.shape != (height, width):
1931-
raise RuntimeError(
1932-
f"single-band GPU tile decode produced shape "
1933-
f"{arr_gpu.shape}, expected ({height}, {width})"
1934-
)
1935-
return arr_gpu
1936-
1937-
1938-
def _apply_orientation_gpu(arr_gpu, orientation: int):
1939-
"""cupy-side mirror of :func:`._reader._apply_orientation`.
1940-
1941-
The CPU reader applies the TIFF Orientation tag (274) post-decode so
1942-
pixel (0, 0) is always the visual top-left. The GPU read path used
1943-
to skip this remap, so reads of any file with orientation != 1
1944-
returned different pixel buffers than the CPU reader (#1540).
1945-
1946-
Same eight orientations the CPU helper handles. Operates on a cupy
1947-
ndarray and returns a cupy ndarray; ``cupy.ascontiguousarray`` is
1948-
applied so downstream views (DataArray.data) work without surprise
1949-
re-strides on the GPU.
1950-
"""
1951-
import cupy
1952-
if orientation == 1:
1953-
return arr_gpu
1954-
if orientation == 2:
1955-
return cupy.ascontiguousarray(arr_gpu[:, ::-1])
1956-
if orientation == 3:
1957-
return cupy.ascontiguousarray(arr_gpu[::-1, ::-1])
1958-
if orientation == 4:
1959-
return cupy.ascontiguousarray(arr_gpu[::-1, :])
1960-
if arr_gpu.ndim == 3:
1961-
if orientation == 5:
1962-
return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2))
1963-
if orientation == 6:
1964-
return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[:, ::-1])
1965-
if orientation == 7:
1966-
return cupy.ascontiguousarray(
1967-
arr_gpu.transpose(1, 0, 2)[::-1, ::-1])
1968-
if orientation == 8:
1969-
return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[::-1, :])
1970-
else:
1971-
if orientation == 5:
1972-
return cupy.ascontiguousarray(arr_gpu.T)
1973-
if orientation == 6:
1974-
return cupy.ascontiguousarray(arr_gpu.T[:, ::-1])
1975-
if orientation == 7:
1976-
return cupy.ascontiguousarray(arr_gpu.T[::-1, ::-1])
1977-
if orientation == 8:
1978-
return cupy.ascontiguousarray(arr_gpu.T[::-1, :])
1979-
raise ValueError(
1980-
f"Invalid TIFF Orientation tag value: {orientation} "
1981-
f"(must be 1-8 per TIFF 6.0)"
1982-
)
1983-
1984-
1985-
def _apply_orientation_geo_info(geo_info, orientation: int,
1986-
file_h: int, file_w: int):
1987-
"""Mirror the transform updates `_reader.read_to_array` does post-flip.
1988-
1989-
Centralised so both ``read_to_array`` (CPU) and ``read_geotiff_gpu``
1990-
(this module) update the GeoTransform consistently. Operates only
1991-
on ``geo_info.transform``; the rest of the GeoInfo struct stays as
1992-
parsed.
1993-
"""
1994-
if orientation == 1 or geo_info is None or geo_info.transform is None:
1995-
return geo_info
1996-
t = geo_info.transform
1997-
if orientation in (2, 3, 4):
1998-
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
1999-
x_shift = file_w - 1
2000-
y_shift = file_h - 1
2001-
else:
2002-
x_shift = file_w
2003-
y_shift = file_h
2004-
new_origin_x = t.origin_x
2005-
new_origin_y = t.origin_y
2006-
new_px_w = t.pixel_width
2007-
new_px_h = t.pixel_height
2008-
if orientation in (2, 3):
2009-
new_origin_x = t.origin_x + x_shift * t.pixel_width
2010-
new_px_w = -t.pixel_width
2011-
if orientation in (3, 4):
2012-
new_origin_y = t.origin_y + y_shift * t.pixel_height
2013-
new_px_h = -t.pixel_height
2014-
geo_info.transform = GeoTransform(
2015-
origin_x=new_origin_x,
2016-
origin_y=new_origin_y,
2017-
pixel_width=new_px_w,
2018-
pixel_height=new_px_h,
2019-
)
2020-
elif orientation in (5, 6, 7, 8):
2021-
# Match the CPU reader's #1765 refusal: a pixel-size swap alone
2022-
# cannot express the per-orientation origin shift plus rotation
2023-
# these orientations require, so the x/y coords would be wrong.
2024-
# ``has_georef`` is True for any file carrying ModelTransformation,
2025-
# ModelPixelScale, or ModelTiepoint, with or without a CRS tag, so
2026-
# gate on that flag rather than CRS presence.
2027-
if getattr(geo_info, 'has_georef', False):
2028-
raise NotImplementedError(
2029-
f"TIFF Orientation {orientation} on a georeferenced file "
2030-
f"requires a per-orientation origin shift plus a rotation "
2031-
f"that the axis-aligned GeoTransform used here cannot "
2032-
f"represent, so the returned x/y coords would be wrong. "
2033-
f"Reproject the file with another tool (e.g. GDAL) or "
2034-
f"strip the Orientation tag before reading. See issue "
2035-
f"#1765."
2036-
)
2037-
# Non-georeferenced file: swap pixel sizes to match the
2038-
# transposed array shape. No geographic claim to violate.
2039-
geo_info.transform = GeoTransform(
2040-
origin_x=t.origin_x,
2041-
origin_y=t.origin_y,
2042-
pixel_width=t.pixel_height,
2043-
pixel_height=t.pixel_width,
2044-
)
2045-
return geo_info
2046-
2047-
2048-
def _gpu_apply_window_band(arr_gpu, geo_info, *, window, band):
2049-
"""Slice a fully-decoded GPU array down to a window and/or band.
2050-
2051-
Used by ``read_geotiff_gpu`` to keep the public surface in line with
2052-
``open_geotiff`` and ``read_geotiff_dask``: callers can pass ``window``
2053-
and ``band``, and the returned DataArray covers exactly that subset.
2054-
2055-
The current implementation slices on device after the full-image GPU
2056-
decode is complete. That preserves correctness but does no I/O
2057-
savings -- a future PR can short-circuit tile decode for partial
2058-
windows. For ``band`` selection, the savings are also post-decode
2059-
because the planar=1 (chunky) tile assembly returns all bands in a
2060-
single GPU buffer.
2061-
2062-
Returns ``(arr_gpu, coords)`` where ``coords`` is a dict with
2063-
``y`` / ``x`` numpy arrays sized to the output array. The caller is
2064-
responsible for setting ``attrs['transform']`` to the windowed origin
2065-
via ``_populate_attrs_from_geo_info(..., window=window)`` so the array
2066-
and the transform agree.
2067-
"""
2068-
if window is not None:
2069-
r0, c0, r1, c1 = window
2070-
arr_gpu = arr_gpu[r0:r1, c0:c1]
2071-
out_h = r1 - r0
2072-
out_w = c1 - c0
2073-
# Mirror the eager-numpy windowed coord computation in
2074-
# open_geotiff so the GPU-windowed coords carry the same
2075-
# absolute pixel-center values as the CPU path. For files
2076-
# with no GeoTIFF tags (``has_georef=False``), fall back to
2077-
# integer pixel coords matching ``_geo_to_coords`` (#1710).
2078-
coords = _coords_from_geo_info(
2079-
geo_info, out_h, out_w, window=window,
2080-
)
2081-
else:
2082-
out_h = arr_gpu.shape[0]
2083-
out_w = arr_gpu.shape[1]
2084-
coords = _geo_to_coords(geo_info, out_h, out_w)
2085-
2086-
if band is not None and arr_gpu.ndim == 3:
2087-
arr_gpu = arr_gpu[:, :, band]
2088-
2089-
return arr_gpu, coords
2090-
2091-
20921790
def read_geotiff_gpu(source: str, *,
20931791
dtype: str | np.dtype | None = None,
20941792
overview_level: int | None = None,
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""Per-backend implementations for the geotiff read entry points.
2+
3+
Step 6 of issue #1813 creates this subpackage and adds
4+
``_gpu_helpers.py``. Later steps add ``gpu.py``, ``dask.py``, and
5+
``vrt.py`` for the backend entry-point bodies. The package
6+
``__init__`` stays empty so nothing leaks into ``xrspatial.geotiff``
7+
through implicit re-exports.
8+
"""

0 commit comments

Comments
 (0)