diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 9ec4ab83..dfb4181c 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -845,6 +845,24 @@ def _read_once(): arr_gpu = np.iinfo(gpu_dtype).max - arr_gpu elif gpu_dtype.kind == 'f': arr_gpu = -arr_gpu + elif gpu_dtype.kind == 'i': + # Mirror the CPU reader's signed-int MinIsWhite + # rejection (issue #2278). Without this the pure-GPU + # decode path would silently return un-inverted + # signed pixels while the CPU decode path raises, + # breaking backend parity. + raise NotImplementedError( + f"Signed-integer MinIsWhite TIFFs are not " + f"supported on the GPU decode path either " + f"(issue #2278): Photometric=0 (MinIsWhite), " + f"SampleFormat={ifd.sample_format} (signed int), " + f"BitsPerSample={ifd.bits_per_sample}, " + f"dtype={gpu_dtype}. xrspatial has no " + f"semantically correct inversion for signed " + f"pixels here. Convert the file to MinIsBlack " + f"(Photometric=1) with another tool, or open it " + f"in an unsigned dtype." + ) if name is None: import os diff --git a/xrspatial/geotiff/_decode.py b/xrspatial/geotiff/_decode.py index 22124c26..7b577e7f 100644 --- a/xrspatial/geotiff/_decode.py +++ b/xrspatial/geotiff/_decode.py @@ -868,13 +868,33 @@ def _apply_orientation_with_geo( def _apply_photometric_miniswhite(arr: np.ndarray, ifd: IFD) -> np.ndarray: - """Apply TIFF MinIsWhite inversion for single-band grayscale images.""" + """Apply TIFF MinIsWhite inversion for single-band grayscale images. + + Signed-integer single-band MinIsWhite is rejected (issue #2278). The + reader used to pass these through unchanged, which round-tripped + inside xrspatial but produced files whose pixel values disagreed + with the on-disk Photometric tag against every other TIFF consumer + (GDAL, libtiff, ImageMagick). + """ if ifd.photometric != 0 or ifd.samples_per_pixel != 1: return arr if arr.dtype.kind == 'u': return np.iinfo(arr.dtype).max - arr if arr.dtype.kind == 'f': return -arr + if arr.dtype.kind == 'i': + raise NotImplementedError( + f"Signed-integer MinIsWhite TIFFs are not supported " + f"(issue #2278): Photometric=0 (MinIsWhite), " + f"SampleFormat={ifd.sample_format} (signed int), " + f"BitsPerSample={ifd.bits_per_sample}, dtype={arr.dtype}. " + f"The reader has no semantically correct inversion for " + f"signed pixels here, and passing them through unchanged " + f"would disagree with the on-disk Photometric tag against " + f"every standards-compliant TIFF reader (GDAL, libtiff, " + f"etc.). Convert the file to MinIsBlack (Photometric=1) " + f"with another tool, or open it in an unsigned dtype." + ) return arr diff --git a/xrspatial/geotiff/_encode.py b/xrspatial/geotiff/_encode.py index 387827cc..9e264e5b 100644 --- a/xrspatial/geotiff/_encode.py +++ b/xrspatial/geotiff/_encode.py @@ -131,8 +131,12 @@ def _apply_photometric_miniswhite_invert( See issue #1836. Returns the pre-inverted array (a new array) so that the reader's - inversion restores the original values. Multi-band data and signed - integer data pass through unchanged, matching the reader. + inversion restores the original values. Multi-band data passes + through unchanged. Signed-integer single-band MinIsWhite raises + ``NotImplementedError`` -- the previous passthrough produced files + whose pixel values disagreed with the on-disk Photometric tag + against every standards-compliant TIFF consumer (GDAL, libtiff). + See issue #2278. """ if resolved_photometric != 0 or samples_per_pixel != 1: return arr @@ -140,6 +144,23 @@ def _apply_photometric_miniswhite_invert( return np.iinfo(arr.dtype).max - arr if arr.dtype.kind == 'f': return -arr + if arr.dtype.kind == 'i': + # Defer the import to dodge any module-load cycle through + # ``_dtypes``. + from ._dtypes import numpy_to_tiff_dtype + bps, sf = numpy_to_tiff_dtype(arr.dtype) + raise NotImplementedError( + f"Writing signed-integer MinIsWhite TIFFs is not supported " + f"(issue #2278): Photometric=0 (MinIsWhite), " + f"SampleFormat={sf} (signed int), BitsPerSample={bps}, " + f"dtype={arr.dtype}. xrspatial has no semantically correct " + f"inversion for signed pixels here, and writing them " + f"un-inverted would produce a file whose pixels disagree " + f"with the Photometric tag against every standards-" + f"compliant TIFF reader (GDAL, libtiff, etc.). Cast to an " + f"unsigned dtype, or write with photometric='minisblack' " + f"/ 'auto'." + ) return arr diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index ea62a9f7..7aea77d6 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -234,7 +234,14 @@ def to_geotiff(data: xr.DataArray | np.ndarray, * ``'rgba'`` -- RGB with the 4th band tagged as unassociated alpha (TIFF ExtraSamples=2). Requires at least 4 bands. * ``'minisblack'`` or ``'miniswhite'`` -- grayscale; multi-band - extras tagged ``0``. + extras tagged ``0``. Signed-integer pixel types with + ``'miniswhite'`` are rejected with ``NotImplementedError`` -- + xrspatial has no semantically correct inversion for signed + MinIsWhite and the silent passthrough that used to happen + produced files that disagreed with the on-disk Photometric + tag against every standards-compliant TIFF reader (issue + #2278). Cast to an unsigned dtype or pass + ``photometric='minisblack'``. * An ``int`` -- written verbatim into Photometric for advanced callers (e.g. ``3`` for Palette, ``5`` for CMYK). diff --git a/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py index c6eca43c..f2837e75 100644 --- a/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py +++ b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py @@ -74,15 +74,26 @@ def test_miniswhite_with_nodata_round_trip(tmp_path): np.testing.assert_allclose(out[0, [0, 2, 3]], arr[0, [0, 2, 3]]) -def test_miniswhite_int_passthrough(tmp_path): - """The reader does not invert signed integer MinIsWhite data, so the - writer must also pass it through unchanged. Otherwise the round-trip - would silently corrupt signed data.""" +def test_miniswhite_int_rejected_at_write_2278(tmp_path): + """Issue #2278: signed-integer MinIsWhite is rejected at write time. + + The writer used to leave signed pixels untouched, which round-tripped + inside xrspatial but produced a file whose pixel values disagreed + with the on-disk Photometric tag against every other TIFF consumer + (GDAL, libtiff, ImageMagick). The writer now refuses the combination + outright with a message that includes SampleFormat / BitsPerSample / + Photometric so callers can diagnose without digging into the spec. + """ arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16) - path = tmp_path / 'i16_msw_1836.tif' - to_geotiff(_da(arr), str(path), photometric='miniswhite') - r = open_geotiff(str(path)) - np.testing.assert_array_equal(np.asarray(r.values), arr) + path = tmp_path / 'i16_msw_2278.tif' + with pytest.raises(NotImplementedError) as excinfo: + to_geotiff(_da(arr), str(path), photometric='miniswhite') + msg = str(excinfo.value) + assert '2278' in msg + assert 'MinIsWhite' in msg + assert 'SampleFormat' in msg + assert 'BitsPerSample' in msg + assert 'int16' in msg def test_uint16_miniswhite_with_in_range_nodata_round_trip(tmp_path): diff --git a/xrspatial/geotiff/tests/test_signed_miniswhite_rejected_2278.py b/xrspatial/geotiff/tests/test_signed_miniswhite_rejected_2278.py new file mode 100644 index 00000000..0c77ae94 --- /dev/null +++ b/xrspatial/geotiff/tests/test_signed_miniswhite_rejected_2278.py @@ -0,0 +1,216 @@ +"""Issue #2278: signed-integer MinIsWhite is rejected on both paths. + +Before #2278 the reader (``_decode._apply_photometric_miniswhite``) and +writer (``_encode._apply_photometric_miniswhite_invert``) silently +passed signed-int pixels through unchanged for ``Photometric=0``. The +result round-tripped inside xrspatial but produced files whose pixel +values disagreed with the on-disk ``Photometric`` tag against every +other TIFF consumer (GDAL, libtiff, ImageMagick). + +Both paths now raise ``NotImplementedError`` with a message that lists +SampleFormat / BitsPerSample / Photometric so callers can diagnose the +rejection without digging into the TIFF spec. The existing unsigned and +float MinIsWhite paths are left intact and still round-trip. +""" +from __future__ import annotations + +import struct + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff +from xrspatial.geotiff._header import ( + TAG_PHOTOMETRIC, + TAG_SAMPLE_FORMAT, + parse_header, +) + + +def _da(arr: np.ndarray, attrs_extra=None) -> xr.DataArray: + """Wrap *arr* as an ``open_geotiff``-compatible DataArray. + + Square 1xN strips are common in the existing miniswhite tests so we + keep the same degenerate-axis opt-in as + ``test_miniswhite_writer_roundtrip_1836.py`` (issue #2214). + """ + h, w = arr.shape + attrs = {'res': (1.0, 1.0)} + if h == 1 or w == 1: + attrs['assume_square_pixels_for_degenerate_axis'] = True + if attrs_extra: + attrs.update(attrs_extra) + return xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(h, dtype=np.float64), + 'x': np.arange(w, dtype=np.float64)}, + attrs=attrs, + ) + + +def _patch_tag_value(data: bytearray, tag: int, value: int) -> None: + """Rewrite the inline value slot of *tag* in the first IFD. + + Both ``TAG_PHOTOMETRIC`` (262) and ``TAG_SAMPLE_FORMAT`` (339) are + written as a single SHORT (type 3, count 1) by the xrspatial writer, + so the on-disk value lives in the IFD entry's first two bytes of the + 8-byte value slot. This helper avoids the type/length checks in + ``_tiff_surgery.patch_byte_counts`` for that simpler case. + """ + header = parse_header(bytes(data)) + bo = header.byte_order + ifd_offset = header.first_ifd_offset + num_entries = struct.unpack_from(f"{bo}H", data, ifd_offset)[0] + entry_offset = ifd_offset + 2 + for i in range(num_entries): + eo = entry_offset + i * 12 + cur_tag = struct.unpack_from(f"{bo}H", data, eo)[0] + if cur_tag != tag: + continue + type_id = struct.unpack_from(f"{bo}H", data, eo + 2)[0] + count = struct.unpack_from(f"{bo}I", data, eo + 4)[0] + assert type_id == 3 and count == 1, ( + f"tag {tag} not the expected SHORT/count=1 layout: " + f"type={type_id} count={count}" + ) + struct.pack_into(f"{bo}H", data, eo + 8, value) + return + raise AssertionError(f"tag {tag} not found in IFD") + + +# --------------------------------------------------------------------------- +# Write path +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize( + "dtype,expected_bps", + [(np.int8, 8), (np.int16, 16), (np.int32, 32), (np.int64, 64)], +) +def test_write_signed_miniswhite_rejected_2278(tmp_path, dtype, expected_bps): + """Every signed dtype is refused at write time with a diagnostic + message that lists SampleFormat, BitsPerSample, and Photometric.""" + info = np.iinfo(dtype) + arr = np.array([[info.min, -1, 0, 1, info.max]], dtype=dtype) + path = tmp_path / f'i{expected_bps}_msw_2278.tif' + with pytest.raises(NotImplementedError) as excinfo: + to_geotiff(_da(arr), str(path), photometric='miniswhite') + msg = str(excinfo.value) + assert '2278' in msg, msg + assert 'MinIsWhite' in msg, msg + assert 'SampleFormat=2' in msg, msg + assert f'BitsPerSample={expected_bps}' in msg, msg + assert str(np.dtype(dtype)) in msg, msg + + +def test_write_signed_miniswhite_does_not_partial_write_2278(tmp_path): + """The rejection fires before any bytes are written, so a failed + write does not leave a half-finished file on disk.""" + arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16) + path = tmp_path / 'i16_msw_2278_no_partial.tif' + with pytest.raises(NotImplementedError): + to_geotiff(_da(arr), str(path), photometric='miniswhite') + assert not path.exists(), ( + f"writer should not leave a partial file on a pre-write " + f"rejection, but {path} exists" + ) + + +# --------------------------------------------------------------------------- +# Read path +# --------------------------------------------------------------------------- + +def _forge_signed_miniswhite_tif(tmp_path, name: str) -> str: + """Write a normal int16 MinIsBlack file then flip its Photometric + tag from 1 (MinIsBlack) to 0 (MinIsWhite) in place. The resulting + file is exactly what the issue describes: SampleFormat=2 (signed), + Photometric=0 (MinIsWhite), which xrspatial used to read back + silently as the un-inverted MinIsBlack pixels.""" + arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16) + path = tmp_path / name + # Default photometric='auto' is MinIsBlack (1) for single-band data, + # so the writer accepts it and emits a normal signed-int TIFF. + to_geotiff(_da(arr), str(path)) + raw = bytearray(path.read_bytes()) + _patch_tag_value(raw, TAG_PHOTOMETRIC, 0) + # SampleFormat is already 2 for int16, but assert via the patch + # helper so the test would fail loud if the writer ever changes. + header = parse_header(bytes(raw)) + bo = header.byte_order + ifd_offset = header.first_ifd_offset + num_entries = struct.unpack_from(f"{bo}H", raw, ifd_offset)[0] + found_sf = None + for i in range(num_entries): + eo = ifd_offset + 2 + i * 12 + cur_tag = struct.unpack_from(f"{bo}H", raw, eo)[0] + if cur_tag == TAG_SAMPLE_FORMAT: + found_sf = struct.unpack_from(f"{bo}H", raw, eo + 8)[0] + break + assert found_sf == 2, ( + f"expected forged file to already declare SampleFormat=2 (signed), " + f"got {found_sf}" + ) + path.write_bytes(bytes(raw)) + return str(path) + + +def test_read_signed_miniswhite_rejected_2278(tmp_path): + """Reading a forged signed-int MinIsWhite TIFF now raises with the + SampleFormat / BitsPerSample / Photometric values in the message + instead of returning silently-wrong pixels.""" + path = _forge_signed_miniswhite_tif(tmp_path, 'i16_msw_read_2278.tif') + with pytest.raises(NotImplementedError) as excinfo: + open_geotiff(path) + msg = str(excinfo.value) + assert '2278' in msg, msg + assert 'MinIsWhite' in msg, msg + assert 'SampleFormat=2' in msg, msg + assert 'BitsPerSample=16' in msg, msg + assert 'int16' in msg, msg + + +# --------------------------------------------------------------------------- +# Non-regression: unsigned and float MinIsWhite still round-trip +# --------------------------------------------------------------------------- + +def test_unsigned_miniswhite_still_round_trips_2278(tmp_path): + """The unsigned path is unaffected by the signed rejection.""" + arr = np.array([[0, 1, 127, 254, 255]], dtype=np.uint8) + path = tmp_path / 'u8_msw_nonreg_2278.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + out = np.asarray(open_geotiff(str(path)).values) + np.testing.assert_array_equal(out, arr) + + +def test_float_miniswhite_still_round_trips_2278(tmp_path): + """The float path is unaffected by the signed rejection.""" + arr = np.array([[-3.5, 0.0, 0.25, 7.5]], dtype=np.float32) + path = tmp_path / 'f32_msw_nonreg_2278.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + out = np.asarray(open_geotiff(str(path)).values) + np.testing.assert_allclose(out, arr) + + +# --------------------------------------------------------------------------- +# GPU read path: same rejection so CPU and GPU stay in sync +# --------------------------------------------------------------------------- + +def test_read_signed_miniswhite_rejected_on_gpu_path_2278(tmp_path): + """The pure-GPU decode path mirrors the CPU rejection (#2278 review + follow-up). Without it, ``open_geotiff(..., gpu=True)`` on a signed + MinIsWhite file would silently return un-inverted pixels while the + CPU path raises, breaking backend parity. + """ + cupy = pytest.importorskip("cupy") + try: + if cupy.cuda.runtime.getDeviceCount() < 1: + pytest.skip("no CUDA device available") + except Exception as exc: # pragma: no cover - CI without CUDA + pytest.skip(f"cupy importable but CUDA unusable: {exc}") + path = _forge_signed_miniswhite_tif(tmp_path, 'i16_msw_gpu_2278.tif') + with pytest.raises(NotImplementedError) as excinfo: + open_geotiff(path, gpu=True) + msg = str(excinfo.value) + assert '2278' in msg, msg + assert 'MinIsWhite' in msg, msg