|
68 | 68 | _populate_attrs_from_geo_info, |
69 | 69 | _resolve_nodata_attr, |
70 | 70 | ) |
| 71 | +from ._backends._gpu_helpers import ( |
| 72 | + _apply_nodata_mask_gpu, |
| 73 | + _apply_orientation_geo_info, |
| 74 | + _apply_orientation_gpu, |
| 75 | + _gpu_apply_window_band, |
| 76 | + _gpu_decode_single_band_tiles, |
| 77 | + _is_gpu_data, |
| 78 | +) |
71 | 79 | from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg |
72 | 80 | from ._reader import read_to_array as _read_to_array |
73 | 81 | from ._runtime import ( |
@@ -503,76 +511,6 @@ def open_geotiff(source: str | BinaryIO, *, |
503 | 511 | return da |
504 | 512 |
|
505 | 513 |
|
506 | | -def _is_gpu_data(data) -> bool: |
507 | | - """Check if data is CuPy-backed (raw array or DataArray).""" |
508 | | - try: |
509 | | - import cupy |
510 | | - _cupy_type = cupy.ndarray |
511 | | - except ImportError: |
512 | | - return False |
513 | | - |
514 | | - if isinstance(data, xr.DataArray): |
515 | | - raw = data.data |
516 | | - if hasattr(raw, 'compute'): |
517 | | - meta = getattr(raw, '_meta', None) |
518 | | - return isinstance(meta, _cupy_type) |
519 | | - return isinstance(raw, _cupy_type) |
520 | | - return isinstance(data, _cupy_type) |
521 | | - |
522 | | - |
523 | | -def _apply_nodata_mask_gpu(arr_gpu, nodata): |
524 | | - """Replace nodata sentinel values with NaN on a CuPy array. |
525 | | -
|
526 | | - Mirrors the CPU eager path in :func:`open_geotiff` (lines around the |
527 | | - ``# Apply nodata mask`` comment). Float arrays get NaN substituted in |
528 | | - place of the sentinel; integer arrays are promoted to float64 with NaN |
529 | | - where the sentinel was. NaN nodata on a float array is a no-op (the |
530 | | - sentinel already matches NaN-aware code). |
531 | | -
|
532 | | - Returns the (possibly promoted, possibly nodata-masked) CuPy array. |
533 | | - The caller is responsible for setting ``attrs['nodata']`` so the |
534 | | - sentinel is still discoverable downstream. |
535 | | - """ |
536 | | - import cupy |
537 | | - |
538 | | - if nodata is None: |
539 | | - return arr_gpu |
540 | | - arr_dtype = np.dtype(str(arr_gpu.dtype)) |
541 | | - if arr_dtype.kind == 'f': |
542 | | - if not np.isnan(nodata): |
543 | | - sentinel = arr_dtype.type(nodata) |
544 | | - arr_gpu = cupy.where(arr_gpu == sentinel, |
545 | | - arr_dtype.type('nan'), arr_gpu) |
546 | | - return arr_gpu |
547 | | - if arr_dtype.kind in ('u', 'i'): |
548 | | - # Out-of-range sentinels (e.g. uint16 + GDAL_NODATA="-9999") cannot |
549 | | - # match any decoded pixel; skip the cast that would otherwise raise |
550 | | - # OverflowError. A non-finite sentinel ("NaN" / "Inf" GDAL_NODATA |
551 | | - # strings) also cannot match an integer pixel and would raise |
552 | | - # ValueError on ``int(nodata)``; gate on ``np.isfinite`` first to |
553 | | - # mirror ``_resolve_masked_fill`` in ``_reader.py`` (#1774). A |
554 | | - # fractional sentinel (e.g. ``"3.5"`` on a ``uint16`` file) also |
555 | | - # cannot match an integer pixel and ``int(3.5)`` would truncate |
556 | | - # to 3, silently masking a real pixel value; gate on |
557 | | - # ``float(nodata).is_integer()`` as well (mirrors the |
558 | | - # ``_writer.py`` / ``_vrt.py`` pattern used for #1564 / #1616). |
559 | | - # attrs['nodata'] is still set by the caller so the original |
560 | | - # sentinel survives a write round-trip. |
561 | | - if not (np.isfinite(nodata) and float(nodata).is_integer()): |
562 | | - return arr_gpu |
563 | | - nodata_int = int(nodata) |
564 | | - info = np.iinfo(arr_dtype) |
565 | | - if not (info.min <= nodata_int <= info.max): |
566 | | - return arr_gpu |
567 | | - sentinel = arr_dtype.type(nodata_int) |
568 | | - mask = arr_gpu == sentinel |
569 | | - if bool(mask.any().item()): |
570 | | - arr_gpu = arr_gpu.astype(cupy.float64) |
571 | | - arr_gpu = cupy.where(mask, cupy.float64('nan'), arr_gpu) |
572 | | - return arr_gpu |
573 | | - return arr_gpu |
574 | | - |
575 | | - |
576 | 514 | def to_geotiff(data: xr.DataArray | np.ndarray, |
577 | 515 | path: str | BinaryIO, *, |
578 | 516 | crs: int | str | None = None, |
@@ -1849,246 +1787,6 @@ def _read(http_meta): |
1849 | 1787 | return _read(http_meta_key) |
1850 | 1788 |
|
1851 | 1789 |
|
1852 | | -def _gpu_decode_single_band_tiles( |
1853 | | - source, lazy_data, offsets, byte_counts, |
1854 | | - tw, th, width, height, |
1855 | | - compression, predictor, file_dtype, |
1856 | | - *, |
1857 | | - byte_order: str, |
1858 | | - gpu: str, |
1859 | | -): |
1860 | | - """Decode one band's tile sequence into a 2-D ``(H, W)`` cupy array. |
1861 | | -
|
1862 | | - Helper for the ``PlanarConfiguration=2`` GPU path: the existing |
1863 | | - ``gpu_decode_tiles`` / ``gpu_decode_tiles_from_file`` kernels assume |
1864 | | - a single chunky tile sequence with ``bytes_per_pixel = itemsize * |
1865 | | - samples``. For planar=2 each band has its own list of tiles, so we |
1866 | | - invoke those kernels once per band with ``samples=1`` and stack the |
1867 | | - resulting 2-D arrays into ``(H, W, samples)`` afterwards. |
1868 | | -
|
1869 | | - Mirrors the two-stage GPU pipeline in ``read_geotiff_gpu`` -- GDS |
1870 | | - first, then CPU-extracted-tiles GPU decode. ``lazy_data`` is a |
1871 | | - zero-arg callable that returns the full file bytes; it caches its |
1872 | | - result so the first band that needs the stage-2 fallback pays the |
1873 | | - ``read_all()``, and subsequent bands reuse the same buffer. When |
1874 | | - every band's GDS path succeeds the file is never read at all. |
1875 | | - Sparse tiles are not expected here; the caller routes sparse files |
1876 | | - to the CPU path. |
1877 | | -
|
1878 | | - Auto-mode semantics: a stage-1 GDS failure warns and falls through |
1879 | | - to stage 2; a stage-2 failure warns and returns ``None`` so the |
1880 | | - caller can trigger a whole-image CPU fallback (a per-band CPU |
1881 | | - decode would require a single-band CPU path keyed off planar |
1882 | | - config, which doesn't exist). Strict mode re-raises from either |
1883 | | - stage. |
1884 | | - """ |
1885 | | - from ._gpu_decode import gpu_decode_tiles, gpu_decode_tiles_from_file |
1886 | | - |
1887 | | - arr_gpu = None |
1888 | | - try: |
1889 | | - arr_gpu = gpu_decode_tiles_from_file( |
1890 | | - source, offsets, byte_counts, |
1891 | | - tw, th, width, height, |
1892 | | - compression, predictor, file_dtype, 1, |
1893 | | - byte_order=byte_order, |
1894 | | - ) |
1895 | | - except Exception as e: |
1896 | | - if gpu == 'strict' or _geotiff_strict_mode(): |
1897 | | - raise |
1898 | | - warnings.warn( |
1899 | | - f"read_geotiff_gpu: GPU decode failed " |
1900 | | - f"({type(e).__name__}: {e}); falling back to CPU.", |
1901 | | - RuntimeWarning, |
1902 | | - stacklevel=3, |
1903 | | - ) |
1904 | | - arr_gpu = None |
1905 | | - |
1906 | | - if arr_gpu is None: |
1907 | | - shared_data = lazy_data() |
1908 | | - compressed_tiles = [ |
1909 | | - bytes(shared_data[offsets[i]:offsets[i] + byte_counts[i]]) |
1910 | | - for i in range(len(offsets)) |
1911 | | - ] |
1912 | | - try: |
1913 | | - arr_gpu = gpu_decode_tiles( |
1914 | | - compressed_tiles, |
1915 | | - tw, th, width, height, |
1916 | | - compression, predictor, file_dtype, 1, |
1917 | | - byte_order=byte_order, |
1918 | | - ) |
1919 | | - except Exception as e: |
1920 | | - if gpu == 'strict' or _geotiff_strict_mode(): |
1921 | | - raise |
1922 | | - warnings.warn( |
1923 | | - f"read_geotiff_gpu: GPU decode failed " |
1924 | | - f"({type(e).__name__}: {e}); falling back to CPU.", |
1925 | | - RuntimeWarning, |
1926 | | - stacklevel=3, |
1927 | | - ) |
1928 | | - return None |
1929 | | - |
1930 | | - if arr_gpu.ndim != 2 or arr_gpu.shape != (height, width): |
1931 | | - raise RuntimeError( |
1932 | | - f"single-band GPU tile decode produced shape " |
1933 | | - f"{arr_gpu.shape}, expected ({height}, {width})" |
1934 | | - ) |
1935 | | - return arr_gpu |
1936 | | - |
1937 | | - |
1938 | | -def _apply_orientation_gpu(arr_gpu, orientation: int): |
1939 | | - """cupy-side mirror of :func:`._reader._apply_orientation`. |
1940 | | -
|
1941 | | - The CPU reader applies the TIFF Orientation tag (274) post-decode so |
1942 | | - pixel (0, 0) is always the visual top-left. The GPU read path used |
1943 | | - to skip this remap, so reads of any file with orientation != 1 |
1944 | | - returned different pixel buffers than the CPU reader (#1540). |
1945 | | -
|
1946 | | - Same eight orientations the CPU helper handles. Operates on a cupy |
1947 | | - ndarray and returns a cupy ndarray; ``cupy.ascontiguousarray`` is |
1948 | | - applied so downstream views (DataArray.data) work without surprise |
1949 | | - re-strides on the GPU. |
1950 | | - """ |
1951 | | - import cupy |
1952 | | - if orientation == 1: |
1953 | | - return arr_gpu |
1954 | | - if orientation == 2: |
1955 | | - return cupy.ascontiguousarray(arr_gpu[:, ::-1]) |
1956 | | - if orientation == 3: |
1957 | | - return cupy.ascontiguousarray(arr_gpu[::-1, ::-1]) |
1958 | | - if orientation == 4: |
1959 | | - return cupy.ascontiguousarray(arr_gpu[::-1, :]) |
1960 | | - if arr_gpu.ndim == 3: |
1961 | | - if orientation == 5: |
1962 | | - return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)) |
1963 | | - if orientation == 6: |
1964 | | - return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[:, ::-1]) |
1965 | | - if orientation == 7: |
1966 | | - return cupy.ascontiguousarray( |
1967 | | - arr_gpu.transpose(1, 0, 2)[::-1, ::-1]) |
1968 | | - if orientation == 8: |
1969 | | - return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[::-1, :]) |
1970 | | - else: |
1971 | | - if orientation == 5: |
1972 | | - return cupy.ascontiguousarray(arr_gpu.T) |
1973 | | - if orientation == 6: |
1974 | | - return cupy.ascontiguousarray(arr_gpu.T[:, ::-1]) |
1975 | | - if orientation == 7: |
1976 | | - return cupy.ascontiguousarray(arr_gpu.T[::-1, ::-1]) |
1977 | | - if orientation == 8: |
1978 | | - return cupy.ascontiguousarray(arr_gpu.T[::-1, :]) |
1979 | | - raise ValueError( |
1980 | | - f"Invalid TIFF Orientation tag value: {orientation} " |
1981 | | - f"(must be 1-8 per TIFF 6.0)" |
1982 | | - ) |
1983 | | - |
1984 | | - |
1985 | | -def _apply_orientation_geo_info(geo_info, orientation: int, |
1986 | | - file_h: int, file_w: int): |
1987 | | - """Mirror the transform updates `_reader.read_to_array` does post-flip. |
1988 | | -
|
1989 | | - Centralised so both ``read_to_array`` (CPU) and ``read_geotiff_gpu`` |
1990 | | - (this module) update the GeoTransform consistently. Operates only |
1991 | | - on ``geo_info.transform``; the rest of the GeoInfo struct stays as |
1992 | | - parsed. |
1993 | | - """ |
1994 | | - if orientation == 1 or geo_info is None or geo_info.transform is None: |
1995 | | - return geo_info |
1996 | | - t = geo_info.transform |
1997 | | - if orientation in (2, 3, 4): |
1998 | | - if geo_info.raster_type == RASTER_PIXEL_IS_POINT: |
1999 | | - x_shift = file_w - 1 |
2000 | | - y_shift = file_h - 1 |
2001 | | - else: |
2002 | | - x_shift = file_w |
2003 | | - y_shift = file_h |
2004 | | - new_origin_x = t.origin_x |
2005 | | - new_origin_y = t.origin_y |
2006 | | - new_px_w = t.pixel_width |
2007 | | - new_px_h = t.pixel_height |
2008 | | - if orientation in (2, 3): |
2009 | | - new_origin_x = t.origin_x + x_shift * t.pixel_width |
2010 | | - new_px_w = -t.pixel_width |
2011 | | - if orientation in (3, 4): |
2012 | | - new_origin_y = t.origin_y + y_shift * t.pixel_height |
2013 | | - new_px_h = -t.pixel_height |
2014 | | - geo_info.transform = GeoTransform( |
2015 | | - origin_x=new_origin_x, |
2016 | | - origin_y=new_origin_y, |
2017 | | - pixel_width=new_px_w, |
2018 | | - pixel_height=new_px_h, |
2019 | | - ) |
2020 | | - elif orientation in (5, 6, 7, 8): |
2021 | | - # Match the CPU reader's #1765 refusal: a pixel-size swap alone |
2022 | | - # cannot express the per-orientation origin shift plus rotation |
2023 | | - # these orientations require, so the x/y coords would be wrong. |
2024 | | - # ``has_georef`` is True for any file carrying ModelTransformation, |
2025 | | - # ModelPixelScale, or ModelTiepoint, with or without a CRS tag, so |
2026 | | - # gate on that flag rather than CRS presence. |
2027 | | - if getattr(geo_info, 'has_georef', False): |
2028 | | - raise NotImplementedError( |
2029 | | - f"TIFF Orientation {orientation} on a georeferenced file " |
2030 | | - f"requires a per-orientation origin shift plus a rotation " |
2031 | | - f"that the axis-aligned GeoTransform used here cannot " |
2032 | | - f"represent, so the returned x/y coords would be wrong. " |
2033 | | - f"Reproject the file with another tool (e.g. GDAL) or " |
2034 | | - f"strip the Orientation tag before reading. See issue " |
2035 | | - f"#1765." |
2036 | | - ) |
2037 | | - # Non-georeferenced file: swap pixel sizes to match the |
2038 | | - # transposed array shape. No geographic claim to violate. |
2039 | | - geo_info.transform = GeoTransform( |
2040 | | - origin_x=t.origin_x, |
2041 | | - origin_y=t.origin_y, |
2042 | | - pixel_width=t.pixel_height, |
2043 | | - pixel_height=t.pixel_width, |
2044 | | - ) |
2045 | | - return geo_info |
2046 | | - |
2047 | | - |
2048 | | -def _gpu_apply_window_band(arr_gpu, geo_info, *, window, band): |
2049 | | - """Slice a fully-decoded GPU array down to a window and/or band. |
2050 | | -
|
2051 | | - Used by ``read_geotiff_gpu`` to keep the public surface in line with |
2052 | | - ``open_geotiff`` and ``read_geotiff_dask``: callers can pass ``window`` |
2053 | | - and ``band``, and the returned DataArray covers exactly that subset. |
2054 | | -
|
2055 | | - The current implementation slices on device after the full-image GPU |
2056 | | - decode is complete. That preserves correctness but does no I/O |
2057 | | - savings -- a future PR can short-circuit tile decode for partial |
2058 | | - windows. For ``band`` selection, the savings are also post-decode |
2059 | | - because the planar=1 (chunky) tile assembly returns all bands in a |
2060 | | - single GPU buffer. |
2061 | | -
|
2062 | | - Returns ``(arr_gpu, coords)`` where ``coords`` is a dict with |
2063 | | - ``y`` / ``x`` numpy arrays sized to the output array. The caller is |
2064 | | - responsible for setting ``attrs['transform']`` to the windowed origin |
2065 | | - via ``_populate_attrs_from_geo_info(..., window=window)`` so the array |
2066 | | - and the transform agree. |
2067 | | - """ |
2068 | | - if window is not None: |
2069 | | - r0, c0, r1, c1 = window |
2070 | | - arr_gpu = arr_gpu[r0:r1, c0:c1] |
2071 | | - out_h = r1 - r0 |
2072 | | - out_w = c1 - c0 |
2073 | | - # Mirror the eager-numpy windowed coord computation in |
2074 | | - # open_geotiff so the GPU-windowed coords carry the same |
2075 | | - # absolute pixel-center values as the CPU path. For files |
2076 | | - # with no GeoTIFF tags (``has_georef=False``), fall back to |
2077 | | - # integer pixel coords matching ``_geo_to_coords`` (#1710). |
2078 | | - coords = _coords_from_geo_info( |
2079 | | - geo_info, out_h, out_w, window=window, |
2080 | | - ) |
2081 | | - else: |
2082 | | - out_h = arr_gpu.shape[0] |
2083 | | - out_w = arr_gpu.shape[1] |
2084 | | - coords = _geo_to_coords(geo_info, out_h, out_w) |
2085 | | - |
2086 | | - if band is not None and arr_gpu.ndim == 3: |
2087 | | - arr_gpu = arr_gpu[:, :, band] |
2088 | | - |
2089 | | - return arr_gpu, coords |
2090 | | - |
2091 | | - |
2092 | 1790 | def read_geotiff_gpu(source: str, *, |
2093 | 1791 | dtype: str | np.dtype | None = None, |
2094 | 1792 | overview_level: int | None = None, |
|
0 commit comments