Skip to content

Commit 4666bf8

Browse files
committed
geotiff: per-tile dim check uses default cap, not caller budget (#1823)
PR #1803 forwarded the caller's max_pixels to read_to_array inside read_vrt's source loop so a tiny VRT output cannot force a huge source decode (#1796). The output-window check at the source read enforces that correctly. A separate per-tile dimension check at the same call sites also consumed the caller's max_pixels, so a caller setting max_pixels as an output budget (e.g. 10_000) failed the per-tile sanity check on any normal source whose default tile size is 256x256 (= 65_536 pixels). Use MAX_PIXELS_DEFAULT for the per-tile dim check at the two call sites in _read_tiles (local) and _read_tiles_cog_http (HTTP). The output-window check at the same functions continues to enforce the user-supplied max_pixels, preserving the #1796 protection.
1 parent a58db09 commit 4666bf8

2 files changed

Lines changed: 127 additions & 5 deletions

File tree

xrspatial/geotiff/_reader.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,9 +1560,14 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
15601560
raise ValueError(
15611561
f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}")
15621562

1563-
# Reject crafted tile dims that would force huge per-tile allocations.
1564-
# A single tile's decoded bytes must also fit under the pixel budget.
1565-
_check_dimensions(tw, th, samples, max_pixels)
1563+
# Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the
1564+
# TIFF header against malformed values; it is not the caller's output
1565+
# budget. The output-window check below uses ``max_pixels`` and is
1566+
# what enforces the user's per-call memory cap. The source-read path
1567+
# under ``read_vrt`` (#1796) relies on that output check to honour a
1568+
# small caller ``max_pixels`` against a normal-tile source; see
1569+
# #1823.
1570+
_check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)
15661571

15671572
# Per-tile compressed-byte cap (issue #1664). Same env var as the
15681573
# HTTP path. mmap slicing is bounded by the file size, but the slice
@@ -2016,10 +2021,14 @@ def _fetch_decode_cog_http_tiles(
20162021
# A windowed HTTP read of a multi-billion-pixel COG only allocates
20172022
# the window, so capping the full image would reject legitimate
20182023
# tiled reads. The full-image cap still applies for whole-file
2019-
# reads (window is None). The single-tile budget always applies.
2024+
# reads (window is None). The per-tile dim check below guards the
2025+
# TIFF header against absurd ``TileWidth`` / ``TileLength`` values
2026+
# (e.g. 2**31) and uses ``MAX_PIXELS_DEFAULT`` so a caller's small
2027+
# ``max_pixels`` -- intended as an output-window budget -- does not
2028+
# reject normal 256x256 tiles. See #1823.
20202029
if window is None:
20212030
_check_dimensions(width, height, samples, max_pixels)
2022-
_check_dimensions(tw, th, samples, max_pixels)
2031+
_check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)
20232032

20242033
# Reject malformed TIFFs whose declared tile grid exceeds the supplied
20252034
# TileOffsets length. See issue #1219.
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""Regression tests for #1823.
2+
3+
PR #1803 forwarded the caller's ``max_pixels`` to ``read_to_array`` inside
4+
the VRT source loop so that a tiny VRT output could not force a huge
5+
source decode (#1796). The output-window check enforces that. A separate
6+
per-tile dimension check was incorrectly using the same ``max_pixels``
7+
value, so a caller setting ``max_pixels`` as an output budget (e.g.
8+
10,000) would also fail the per-tile sanity check on every normal source
9+
whose default tile size is 256x256 (= 65,536 pixels).
10+
11+
The #1796 protection remains: the output-window check still catches a
12+
tiny VRT output that asks for a large source window.
13+
"""
14+
from __future__ import annotations
15+
16+
import os
17+
import tempfile
18+
19+
import numpy as np
20+
import pytest
21+
22+
from xrspatial.geotiff import to_geotiff
23+
from xrspatial.geotiff._reader import PixelSafetyLimitError
24+
from xrspatial.geotiff._vrt import read_vrt
25+
26+
27+
def _write_normal_tile_source(td: str) -> str:
28+
"""10x10 uint8 source -- ``to_geotiff`` pads to a 256x256 tile."""
29+
src = os.path.join(td, 'src.tif')
30+
to_geotiff(np.zeros((10, 10), dtype=np.uint8), src, compression='none')
31+
return src
32+
33+
34+
def _write_vrt(td: str, *, dst_x_size: int, dst_y_size: int,
35+
raster_x: int = 100, raster_y: int = 100,
36+
src_x_size: int = 10, src_y_size: int = 10) -> str:
37+
vrt = os.path.join(td, 'mosaic.vrt')
38+
xml = (
39+
f'<VRTDataset rasterXSize="{raster_x}" rasterYSize="{raster_y}">\n'
40+
f' <VRTRasterBand dataType="Byte" band="1">\n'
41+
f' <SimpleSource>\n'
42+
f' <SourceFilename relativeToVRT="1">src.tif</SourceFilename>\n'
43+
f' <SourceBand>1</SourceBand>\n'
44+
f' <SrcRect xOff="0" yOff="0" '
45+
f'xSize="{src_x_size}" ySize="{src_y_size}"/>\n'
46+
f' <DstRect xOff="0" yOff="0" '
47+
f'xSize="{dst_x_size}" ySize="{dst_y_size}"/>\n'
48+
f' </SimpleSource>\n'
49+
f' </VRTRasterBand>\n'
50+
f'</VRTDataset>\n'
51+
)
52+
with open(vrt, 'w') as f:
53+
f.write(xml)
54+
return vrt
55+
56+
57+
class TestPerTileCheckDoesNotUseCallerBudget:
58+
"""Per-tile dim sanity must not reject normal 256x256 source tiles
59+
when the caller's ``max_pixels`` is a small output-budget value."""
60+
61+
def test_normal_tile_source_with_small_max_pixels(self):
62+
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
63+
_write_normal_tile_source(td)
64+
vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100)
65+
arr, _ = read_vrt(vrt, max_pixels=10_000)
66+
assert arr.shape == (100, 100)
67+
68+
def test_normal_tile_source_with_tiny_max_pixels(self):
69+
"""An output budget below a single tile must still succeed when
70+
the requested output window itself fits."""
71+
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
72+
_write_normal_tile_source(td)
73+
# Output 5x5 = 25 pixels; max_pixels = 100 fits 25 with room.
74+
vrt = _write_vrt(td, dst_x_size=5, dst_y_size=5,
75+
raster_x=5, raster_y=5)
76+
arr, _ = read_vrt(vrt, max_pixels=100)
77+
assert arr.shape == (5, 5)
78+
79+
80+
class TestOutputWindowCheckStillEnforced:
81+
"""The output-window check at the source read still rejects an
82+
over-budget read; the #1796 protection is preserved."""
83+
84+
def test_output_window_exceeds_max_pixels_still_rejected(self):
85+
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
86+
src = os.path.join(td, 'src.tif')
87+
to_geotiff(np.arange(16, dtype=np.uint8).reshape(4, 4),
88+
src, compression='none')
89+
vrt = _write_vrt(td, dst_x_size=1, dst_y_size=1,
90+
raster_x=1, raster_y=1,
91+
src_x_size=4, src_y_size=4)
92+
# SrcRect 4x4 = 16 pixels > max_pixels=1 → output check fires.
93+
with pytest.raises(ValueError, match="exceed"):
94+
read_vrt(vrt, max_pixels=1)
95+
96+
97+
class TestPerTileCheckStillRejectsCraftedHeader:
98+
"""A pathological ``TileWidth``/``TileLength`` must still fail at
99+
the per-tile sanity check, which uses ``MAX_PIXELS_DEFAULT``."""
100+
101+
def test_per_tile_check_caps_at_default(self, monkeypatch):
102+
"""Lower ``MAX_PIXELS_DEFAULT`` to verify the per-tile call site
103+
is wired to it (rather than to the caller's budget)."""
104+
from xrspatial.geotiff import _reader as reader_mod
105+
106+
monkeypatch.setattr(reader_mod, "MAX_PIXELS_DEFAULT", 100)
107+
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
108+
_write_normal_tile_source(td)
109+
vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100)
110+
# 256x256 tile > patched MAX_PIXELS_DEFAULT=100 → per-tile
111+
# check fires regardless of caller's max_pixels (1e9 here).
112+
with pytest.raises(PixelSafetyLimitError, match="65,536"):
113+
read_vrt(vrt, max_pixels=1_000_000_000)

0 commit comments

Comments
 (0)