Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions xrspatial/geotiff/_backends/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,24 @@ def _read_once():
arr_gpu = np.iinfo(gpu_dtype).max - arr_gpu
elif gpu_dtype.kind == 'f':
arr_gpu = -arr_gpu
elif gpu_dtype.kind == 'i':
# Mirror the CPU reader's signed-int MinIsWhite
# rejection (issue #2278). Without this the pure-GPU
# decode path would silently return un-inverted
# signed pixels while the CPU decode path raises,
# breaking backend parity.
raise NotImplementedError(
f"Signed-integer MinIsWhite TIFFs are not "
f"supported on the GPU decode path either "
f"(issue #2278): Photometric=0 (MinIsWhite), "
f"SampleFormat={ifd.sample_format} (signed int), "
f"BitsPerSample={ifd.bits_per_sample}, "
f"dtype={gpu_dtype}. xrspatial has no "
f"semantically correct inversion for signed "
f"pixels here. Convert the file to MinIsBlack "
f"(Photometric=1) with another tool, or open it "
f"in an unsigned dtype."
)

if name is None:
import os
Expand Down
22 changes: 21 additions & 1 deletion xrspatial/geotiff/_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,13 +868,33 @@ def _apply_orientation_with_geo(


def _apply_photometric_miniswhite(arr: np.ndarray, ifd: IFD) -> np.ndarray:
"""Apply TIFF MinIsWhite inversion for single-band grayscale images."""
"""Apply TIFF MinIsWhite inversion for single-band grayscale images.

Signed-integer single-band MinIsWhite is rejected (issue #2278). The
reader used to pass these through unchanged, which round-tripped
inside xrspatial but produced files whose pixel values disagreed
with the on-disk Photometric tag against every other TIFF consumer
(GDAL, libtiff, ImageMagick).
"""
if ifd.photometric != 0 or ifd.samples_per_pixel != 1:
return arr
if arr.dtype.kind == 'u':
return np.iinfo(arr.dtype).max - arr
if arr.dtype.kind == 'f':
return -arr
if arr.dtype.kind == 'i':
raise NotImplementedError(
f"Signed-integer MinIsWhite TIFFs are not supported "
f"(issue #2278): Photometric=0 (MinIsWhite), "
f"SampleFormat={ifd.sample_format} (signed int), "
f"BitsPerSample={ifd.bits_per_sample}, dtype={arr.dtype}. "
f"The reader has no semantically correct inversion for "
f"signed pixels here, and passing them through unchanged "
f"would disagree with the on-disk Photometric tag against "
f"every standards-compliant TIFF reader (GDAL, libtiff, "
f"etc.). Convert the file to MinIsBlack (Photometric=1) "
f"with another tool, or open it in an unsigned dtype."
)
return arr


Expand Down
25 changes: 23 additions & 2 deletions xrspatial/geotiff/_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,36 @@ def _apply_photometric_miniswhite_invert(
See issue #1836.

Returns the pre-inverted array (a new array) so that the reader's
inversion restores the original values. Multi-band data and signed
integer data pass through unchanged, matching the reader.
inversion restores the original values. Multi-band data passes
through unchanged. Signed-integer single-band MinIsWhite raises
``NotImplementedError`` -- the previous passthrough produced files
whose pixel values disagreed with the on-disk Photometric tag
against every standards-compliant TIFF consumer (GDAL, libtiff).
See issue #2278.
"""
if resolved_photometric != 0 or samples_per_pixel != 1:
return arr
if arr.dtype.kind == 'u':
return np.iinfo(arr.dtype).max - arr
if arr.dtype.kind == 'f':
return -arr
if arr.dtype.kind == 'i':
# Defer the import to dodge any module-load cycle through
# ``_dtypes``.
from ._dtypes import numpy_to_tiff_dtype
bps, sf = numpy_to_tiff_dtype(arr.dtype)
raise NotImplementedError(
f"Writing signed-integer MinIsWhite TIFFs is not supported "
f"(issue #2278): Photometric=0 (MinIsWhite), "
f"SampleFormat={sf} (signed int), BitsPerSample={bps}, "
f"dtype={arr.dtype}. xrspatial has no semantically correct "
f"inversion for signed pixels here, and writing them "
f"un-inverted would produce a file whose pixels disagree "
f"with the Photometric tag against every standards-"
f"compliant TIFF reader (GDAL, libtiff, etc.). Cast to an "
f"unsigned dtype, or write with photometric='minisblack' "
f"/ 'auto'."
)
return arr


Expand Down
9 changes: 8 additions & 1 deletion xrspatial/geotiff/_writers/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,14 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
* ``'rgba'`` -- RGB with the 4th band tagged as unassociated
alpha (TIFF ExtraSamples=2). Requires at least 4 bands.
* ``'minisblack'`` or ``'miniswhite'`` -- grayscale; multi-band
extras tagged ``0``.
extras tagged ``0``. Signed-integer pixel types with
``'miniswhite'`` are rejected with ``NotImplementedError`` --
xrspatial has no semantically correct inversion for signed
MinIsWhite and the silent passthrough that used to happen
produced files that disagreed with the on-disk Photometric
tag against every standards-compliant TIFF reader (issue
#2278). Cast to an unsigned dtype or pass
``photometric='minisblack'``.
* An ``int`` -- written verbatim into Photometric for advanced
callers (e.g. ``3`` for Palette, ``5`` for CMYK).

Expand Down
27 changes: 19 additions & 8 deletions xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,26 @@ def test_miniswhite_with_nodata_round_trip(tmp_path):
np.testing.assert_allclose(out[0, [0, 2, 3]], arr[0, [0, 2, 3]])


def test_miniswhite_int_passthrough(tmp_path):
"""The reader does not invert signed integer MinIsWhite data, so the
writer must also pass it through unchanged. Otherwise the round-trip
would silently corrupt signed data."""
def test_miniswhite_int_rejected_at_write_2278(tmp_path):
"""Issue #2278: signed-integer MinIsWhite is rejected at write time.

The writer used to leave signed pixels untouched, which round-tripped
inside xrspatial but produced a file whose pixel values disagreed
with the on-disk Photometric tag against every other TIFF consumer
(GDAL, libtiff, ImageMagick). The writer now refuses the combination
outright with a message that includes SampleFormat / BitsPerSample /
Photometric so callers can diagnose without digging into the spec.
"""
arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16)
path = tmp_path / 'i16_msw_1836.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite')
r = open_geotiff(str(path))
np.testing.assert_array_equal(np.asarray(r.values), arr)
path = tmp_path / 'i16_msw_2278.tif'
with pytest.raises(NotImplementedError) as excinfo:
to_geotiff(_da(arr), str(path), photometric='miniswhite')
msg = str(excinfo.value)
assert '2278' in msg
assert 'MinIsWhite' in msg
assert 'SampleFormat' in msg
assert 'BitsPerSample' in msg
assert 'int16' in msg


def test_uint16_miniswhite_with_in_range_nodata_round_trip(tmp_path):
Expand Down
216 changes: 216 additions & 0 deletions xrspatial/geotiff/tests/test_signed_miniswhite_rejected_2278.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
"""Issue #2278: signed-integer MinIsWhite is rejected on both paths.

Before #2278 the reader (``_decode._apply_photometric_miniswhite``) and
writer (``_encode._apply_photometric_miniswhite_invert``) silently
passed signed-int pixels through unchanged for ``Photometric=0``. The
result round-tripped inside xrspatial but produced files whose pixel
values disagreed with the on-disk ``Photometric`` tag against every
other TIFF consumer (GDAL, libtiff, ImageMagick).

Both paths now raise ``NotImplementedError`` with a message that lists
SampleFormat / BitsPerSample / Photometric so callers can diagnose the
rejection without digging into the TIFF spec. The existing unsigned and
float MinIsWhite paths are left intact and still round-trip.
"""
from __future__ import annotations

import struct

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff
from xrspatial.geotiff._header import (
TAG_PHOTOMETRIC,
TAG_SAMPLE_FORMAT,
parse_header,
)


def _da(arr: np.ndarray, attrs_extra=None) -> xr.DataArray:
"""Wrap *arr* as an ``open_geotiff``-compatible DataArray.

Square 1xN strips are common in the existing miniswhite tests so we
keep the same degenerate-axis opt-in as
``test_miniswhite_writer_roundtrip_1836.py`` (issue #2214).
"""
h, w = arr.shape
attrs = {'res': (1.0, 1.0)}
if h == 1 or w == 1:
attrs['assume_square_pixels_for_degenerate_axis'] = True
if attrs_extra:
attrs.update(attrs_extra)
return xr.DataArray(
arr,
dims=('y', 'x'),
coords={'y': np.arange(h, dtype=np.float64),
'x': np.arange(w, dtype=np.float64)},
attrs=attrs,
)


def _patch_tag_value(data: bytearray, tag: int, value: int) -> None:
"""Rewrite the inline value slot of *tag* in the first IFD.

Both ``TAG_PHOTOMETRIC`` (262) and ``TAG_SAMPLE_FORMAT`` (339) are
written as a single SHORT (type 3, count 1) by the xrspatial writer,
so the on-disk value lives in the IFD entry's first two bytes of the
8-byte value slot. This helper avoids the type/length checks in
``_tiff_surgery.patch_byte_counts`` for that simpler case.
"""
header = parse_header(bytes(data))
bo = header.byte_order
ifd_offset = header.first_ifd_offset
num_entries = struct.unpack_from(f"{bo}H", data, ifd_offset)[0]
entry_offset = ifd_offset + 2
for i in range(num_entries):
eo = entry_offset + i * 12
cur_tag = struct.unpack_from(f"{bo}H", data, eo)[0]
if cur_tag != tag:
continue
type_id = struct.unpack_from(f"{bo}H", data, eo + 2)[0]
count = struct.unpack_from(f"{bo}I", data, eo + 4)[0]
assert type_id == 3 and count == 1, (
f"tag {tag} not the expected SHORT/count=1 layout: "
f"type={type_id} count={count}"
)
struct.pack_into(f"{bo}H", data, eo + 8, value)
return
raise AssertionError(f"tag {tag} not found in IFD")


# ---------------------------------------------------------------------------
# Write path
# ---------------------------------------------------------------------------

@pytest.mark.parametrize(
"dtype,expected_bps",
[(np.int8, 8), (np.int16, 16), (np.int32, 32), (np.int64, 64)],
)
def test_write_signed_miniswhite_rejected_2278(tmp_path, dtype, expected_bps):
"""Every signed dtype is refused at write time with a diagnostic
message that lists SampleFormat, BitsPerSample, and Photometric."""
info = np.iinfo(dtype)
arr = np.array([[info.min, -1, 0, 1, info.max]], dtype=dtype)
path = tmp_path / f'i{expected_bps}_msw_2278.tif'
with pytest.raises(NotImplementedError) as excinfo:
to_geotiff(_da(arr), str(path), photometric='miniswhite')
msg = str(excinfo.value)
assert '2278' in msg, msg
assert 'MinIsWhite' in msg, msg
assert 'SampleFormat=2' in msg, msg
assert f'BitsPerSample={expected_bps}' in msg, msg
assert str(np.dtype(dtype)) in msg, msg


def test_write_signed_miniswhite_does_not_partial_write_2278(tmp_path):
"""The rejection fires before any bytes are written, so a failed
write does not leave a half-finished file on disk."""
arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16)
path = tmp_path / 'i16_msw_2278_no_partial.tif'
with pytest.raises(NotImplementedError):
to_geotiff(_da(arr), str(path), photometric='miniswhite')
assert not path.exists(), (
f"writer should not leave a partial file on a pre-write "
f"rejection, but {path} exists"
)


# ---------------------------------------------------------------------------
# Read path
# ---------------------------------------------------------------------------

def _forge_signed_miniswhite_tif(tmp_path, name: str) -> str:
"""Write a normal int16 MinIsBlack file then flip its Photometric
tag from 1 (MinIsBlack) to 0 (MinIsWhite) in place. The resulting
file is exactly what the issue describes: SampleFormat=2 (signed),
Photometric=0 (MinIsWhite), which xrspatial used to read back
silently as the un-inverted MinIsBlack pixels."""
arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16)
path = tmp_path / name
# Default photometric='auto' is MinIsBlack (1) for single-band data,
# so the writer accepts it and emits a normal signed-int TIFF.
to_geotiff(_da(arr), str(path))
raw = bytearray(path.read_bytes())
_patch_tag_value(raw, TAG_PHOTOMETRIC, 0)
# SampleFormat is already 2 for int16, but assert via the patch
# helper so the test would fail loud if the writer ever changes.
header = parse_header(bytes(raw))
bo = header.byte_order
ifd_offset = header.first_ifd_offset
num_entries = struct.unpack_from(f"{bo}H", raw, ifd_offset)[0]
found_sf = None
for i in range(num_entries):
eo = ifd_offset + 2 + i * 12
cur_tag = struct.unpack_from(f"{bo}H", raw, eo)[0]
if cur_tag == TAG_SAMPLE_FORMAT:
found_sf = struct.unpack_from(f"{bo}H", raw, eo + 8)[0]
break
assert found_sf == 2, (
f"expected forged file to already declare SampleFormat=2 (signed), "
f"got {found_sf}"
)
path.write_bytes(bytes(raw))
return str(path)


def test_read_signed_miniswhite_rejected_2278(tmp_path):
"""Reading a forged signed-int MinIsWhite TIFF now raises with the
SampleFormat / BitsPerSample / Photometric values in the message
instead of returning silently-wrong pixels."""
path = _forge_signed_miniswhite_tif(tmp_path, 'i16_msw_read_2278.tif')
with pytest.raises(NotImplementedError) as excinfo:
open_geotiff(path)
msg = str(excinfo.value)
assert '2278' in msg, msg
assert 'MinIsWhite' in msg, msg
assert 'SampleFormat=2' in msg, msg
assert 'BitsPerSample=16' in msg, msg
assert 'int16' in msg, msg


# ---------------------------------------------------------------------------
# Non-regression: unsigned and float MinIsWhite still round-trip
# ---------------------------------------------------------------------------

def test_unsigned_miniswhite_still_round_trips_2278(tmp_path):
"""The unsigned path is unaffected by the signed rejection."""
arr = np.array([[0, 1, 127, 254, 255]], dtype=np.uint8)
path = tmp_path / 'u8_msw_nonreg_2278.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite')
out = np.asarray(open_geotiff(str(path)).values)
np.testing.assert_array_equal(out, arr)


def test_float_miniswhite_still_round_trips_2278(tmp_path):
"""The float path is unaffected by the signed rejection."""
arr = np.array([[-3.5, 0.0, 0.25, 7.5]], dtype=np.float32)
path = tmp_path / 'f32_msw_nonreg_2278.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite')
out = np.asarray(open_geotiff(str(path)).values)
np.testing.assert_allclose(out, arr)


# ---------------------------------------------------------------------------
# GPU read path: same rejection so CPU and GPU stay in sync
# ---------------------------------------------------------------------------

def test_read_signed_miniswhite_rejected_on_gpu_path_2278(tmp_path):
"""The pure-GPU decode path mirrors the CPU rejection (#2278 review
follow-up). Without it, ``open_geotiff(..., gpu=True)`` on a signed
MinIsWhite file would silently return un-inverted pixels while the
CPU path raises, breaking backend parity.
"""
cupy = pytest.importorskip("cupy")
try:
if cupy.cuda.runtime.getDeviceCount() < 1:
pytest.skip("no CUDA device available")
except Exception as exc: # pragma: no cover - CI without CUDA
pytest.skip(f"cupy importable but CUDA unusable: {exc}")
path = _forge_signed_miniswhite_tif(tmp_path, 'i16_msw_gpu_2278.tif')
with pytest.raises(NotImplementedError) as excinfo:
open_geotiff(path, gpu=True)
msg = str(excinfo.value)
assert '2278' in msg, msg
assert 'MinIsWhite' in msg, msg
Loading