Skip to content

Commit 86dcd8c

Browse files
authored
Fix COG overview block ordering (#2308) (#2309)
1 parent f5fbad5 commit 86dcd8c

2 files changed

Lines changed: 177 additions & 6 deletions

File tree

xrspatial/geotiff/_write_layout.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -300,11 +300,36 @@ def _assemble_cog_layout(header_size: int,
300300
total_ifd_size = sum(bs + ov for bs, ov in ifd_blocks)
301301
pixel_data_start = header_size + total_ifd_size
302302

303-
# Second pass: pixel data offsets per level
303+
# COG block-order requirement (issue #2308): on-disk pixel data must
304+
# run smallest-overview first, then progressively larger overviews,
305+
# with the main-resolution image's blocks last. ``pixel_data_parts``
306+
# arrives in ``[main, ov_factor_2, ov_factor_4, ...]`` order (full
307+
# res first, then overviews of increasing decimation), so the
308+
# emission order is the overview tail reversed (smallest factor =
309+
# largest decimation = last entry, emitted first) followed by index
310+
# 0 (main resolution). The IFD chain still walks ``[main, ov1, ov2,
311+
# ...]`` -- only the byte-level placement of the tile/strip blocks
312+
# changes, which is what external validators like rio-cogeo check.
313+
n_parts = len(pixel_data_parts)
314+
if n_parts > 1:
315+
pixel_emission_order = list(range(n_parts - 1, 0, -1)) + [0]
316+
else:
317+
# In normal use, _assemble_tiff routes single-IFD outputs to
318+
# ``_assemble_standard_layout``; the upstream gate
319+
# ``is_cog and len(ifd_specs) > 1`` guarantees n_parts >= 2 here.
320+
# The single-entry fallback keeps the helper safe for direct
321+
# unit tests that exercise the COG layout with one IFD.
322+
pixel_emission_order = [0]
323+
324+
# Second pass: pixel data offsets per level. We walk the emission
325+
# order to accumulate offsets, then store them indexed by the
326+
# original level so the third pass can look up each IFD's tile/strip
327+
# base directly.
328+
level_pixel_offsets = [0] * n_parts
304329
current_pixel_offset = pixel_data_start
305-
level_pixel_offsets = []
306-
for _arr, _lw, _lh, rel_offsets, byte_counts, comp_chunks in pixel_data_parts:
307-
level_pixel_offsets.append(current_pixel_offset)
330+
for emit_idx in pixel_emission_order:
331+
_arr, _lw, _lh, _rel, _bc, comp_chunks = pixel_data_parts[emit_idx]
332+
level_pixel_offsets[emit_idx] = current_pixel_offset
308333
current_pixel_offset += sum(len(c) for c in comp_chunks)
309334

310335
# Third pass: build IFDs with correct offsets
@@ -355,8 +380,11 @@ def _assemble_cog_layout(header_size: int,
355380
output.extend(overflow_bytes)
356381
current_ifd_pos = len(output)
357382

358-
# Append all pixel data
359-
for _arr, _lw, _lh, _rel_offsets, _byte_counts, comp_chunks in pixel_data_parts:
383+
# Append all pixel data in COG-compliant order: smallest overview
384+
# first, then progressively larger, with the main resolution image
385+
# last (issue #2308).
386+
for emit_idx in pixel_emission_order:
387+
_arr, _lw, _lh, _rel, _bc, comp_chunks = pixel_data_parts[emit_idx]
360388
for chunk in comp_chunks:
361389
output.extend(chunk)
362390

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
"""COG overview tile-block ordering invariant (issue #2308).
2+
3+
The COG spec requires the on-disk pixel-data layout to run from the
4+
smallest overview through progressively larger overviews and end with
5+
the main-resolution image. External readers (rio-cogeo, GDAL's
6+
``validate_cloud_optimized_geotiff``) flag a file when the byte
7+
ordering reverses or interleaves these blocks even though the IFD
8+
chain walks main -> ov1 -> ov2 the conventional way. These tests lock
9+
the byte order in as a regression gate so the writer cannot drift back
10+
to the old layout.
11+
"""
12+
from __future__ import annotations
13+
14+
import os
15+
import tempfile
16+
17+
import numpy as np
18+
import pytest
19+
import xarray as xr
20+
21+
from xrspatial.geotiff import to_geotiff
22+
from xrspatial.geotiff._header import parse_all_ifds, parse_header
23+
24+
25+
def _min_block_offset(ifd) -> int:
26+
"""Return the smallest tile-offset (or strip-offset) for an IFD."""
27+
offsets = ifd.tile_offsets
28+
if offsets is None:
29+
offsets = ifd.strip_offsets
30+
assert offsets is not None and len(offsets) > 0, (
31+
"IFD has neither tile_offsets nor strip_offsets")
32+
return min(offsets)
33+
34+
35+
def _read_block_order(path: str) -> list[int]:
36+
"""Return ``min_block_offset`` for each IFD in walk order."""
37+
with open(path, "rb") as f:
38+
data = f.read()
39+
header = parse_header(data)
40+
ifds = parse_all_ifds(data, header)
41+
return [_min_block_offset(ifd) for ifd in ifds]
42+
43+
44+
def _make_da(shape, bands: int | None = None) -> xr.DataArray:
45+
"""Build a synthetic DataArray with a sane CRS / coordinate grid."""
46+
rng = np.random.RandomState(17)
47+
if bands is None:
48+
arr = rng.rand(*shape).astype("float32")
49+
dims = ("y", "x")
50+
else:
51+
arr = rng.rand(bands, *shape).astype("float32")
52+
dims = ("band", "y", "x")
53+
h, w = shape
54+
coords = {
55+
"y": np.linspace(45, 44, h),
56+
"x": np.linspace(-120, -119, w),
57+
}
58+
return xr.DataArray(arr, dims=dims, coords=coords, attrs={"crs": 4326})
59+
60+
61+
@pytest.mark.parametrize("bands", [None, 3])
62+
def test_cog_overview_block_order_invariant_2308(bands):
63+
"""Pixel blocks must run smallest-overview -> larger -> main.
64+
65+
The IFD walk order is ``[main, ov_factor_2, ov_factor_4]`` (full
66+
resolution first). The on-disk pixel-block order must be the
67+
reverse: factor-4 overview blocks first, then factor-2 overview
68+
blocks, with the main-resolution blocks last (issue #2308).
69+
"""
70+
da = _make_da((256, 256), bands=bands)
71+
with tempfile.TemporaryDirectory() as td:
72+
suffix = "rgb" if bands else "mono"
73+
path = os.path.join(td, f"order_2308_{suffix}.tif")
74+
to_geotiff(
75+
da, path, compression="deflate", cog=True,
76+
tile_size=64, overview_levels=[2, 4],
77+
)
78+
79+
block_offsets = _read_block_order(path)
80+
# IFD walk: [main, ov_factor_2, ov_factor_4]
81+
main_min, ov2_min, ov4_min = block_offsets
82+
# COG layout: factor-4 (smallest overview) -> factor-2 -> main.
83+
assert ov4_min < ov2_min, (
84+
f"smallest overview blocks should sit before larger "
85+
f"overview blocks: ov4_min={ov4_min}, ov2_min={ov2_min}")
86+
assert ov2_min < main_min, (
87+
f"overview blocks should sit before main-resolution "
88+
f"blocks: ov2_min={ov2_min}, main_min={main_min}")
89+
90+
91+
def test_cog_overview_block_order_three_levels_2308():
92+
"""Same invariant with three overview levels (factor 2/4/8)."""
93+
da = _make_da((512, 512))
94+
with tempfile.TemporaryDirectory() as td:
95+
path = os.path.join(td, "order_2308_three.tif")
96+
to_geotiff(
97+
da, path, compression="deflate", cog=True,
98+
tile_size=64, overview_levels=[2, 4, 8],
99+
)
100+
101+
block_offsets = _read_block_order(path)
102+
# IFD walk: [main, ov2, ov4, ov8]
103+
main_min, ov2_min, ov4_min, ov8_min = block_offsets
104+
# On-disk: ov8 -> ov4 -> ov2 -> main
105+
assert ov8_min < ov4_min < ov2_min < main_min, (
106+
f"COG block order broken: ov8={ov8_min} ov4={ov4_min} "
107+
f"ov2={ov2_min} main={main_min}")
108+
109+
110+
def _rio_cogeo_or_skip():
111+
"""Skip the rio-cogeo gate when the dependency isn't installed.
112+
113+
Mirrors the skip semantics used in ``test_cog_writer_compliance``:
114+
contributor laptops without rio-cogeo see a skip, CI with rio-cogeo
115+
runs the strict check.
116+
"""
117+
try:
118+
from rio_cogeo.cogeo import cog_validate
119+
except ImportError:
120+
pytest.skip("rio-cogeo not installed")
121+
return cog_validate
122+
123+
124+
@pytest.mark.parametrize("bands", [None, 3])
125+
def test_cog_overview_block_order_rio_cogeo_2308(bands):
126+
"""``rio-cogeo cog_validate`` returns valid=True with no block-order errors."""
127+
cog_validate = _rio_cogeo_or_skip()
128+
da = _make_da((256, 256), bands=bands)
129+
with tempfile.TemporaryDirectory() as td:
130+
suffix = "rgb" if bands else "mono"
131+
path = os.path.join(td, f"order_2308_rio_{suffix}.tif")
132+
to_geotiff(
133+
da, path, compression="deflate", cog=True,
134+
tile_size=64, overview_levels=[2, 4],
135+
)
136+
valid, errors, _warnings = cog_validate(path, strict=False)
137+
assert valid, f"rio_cogeo cog_validate failed: {errors}"
138+
# Defensive secondary assertion: the two block-order messages
139+
# from #2308 must not reappear even if some future writer
140+
# change keeps the validator happy on the headline check.
141+
joined = " ".join(errors).lower()
142+
assert "offset of the first block" not in joined, (
143+
f"block-order errors regressed: {errors}")

0 commit comments

Comments
 (0)