diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst index d0c0bb90..bdae376a 100644 --- a/docs/source/reference/geotiff.rst +++ b/docs/source/reference/geotiff.rst @@ -129,9 +129,18 @@ VRT missing sources =================== ``read_vrt`` accepts ``missing_sources='warn'`` or ``'raise'``. The default -``'warn'`` preserves the historical behavior: unreadable source files emit -:class:`xrspatial.geotiff.GeoTIFFFallbackWarning`, the returned DataArray -contains ``attrs['vrt_holes']``, and the mosaic is returned with holes. -Use ``missing_sources='raise'`` when a partial mosaic should fail the -pipeline immediately. ``XRSPATIAL_GEOTIFF_STRICT=1`` still raises in -``'warn'`` mode so CI environments can enforce fail-fast behavior globally. +``'raise'`` (since #1860) fails the read immediately if any source file +referenced by the VRT does not exist on disk. Both the eager and chunked +dispatchers honour this at construction time -- chunked callers do not +have to wait until ``compute()`` to learn the VRT is broken (#2265). +The static missing-source sweep is scoped to the requested ``window=`` +and ``band=`` so a windowed or band-restricted read that does not depend +on a missing source still succeeds. + +Pass ``missing_sources='warn'`` to opt into the lenient path: unreadable +source files emit :class:`xrspatial.geotiff.GeoTIFFFallbackWarning`, the +returned DataArray carries ``attrs['vrt_holes']``, and the mosaic is +returned with holes left as the band's nodata sentinel (or zero on +integer bands without a sentinel). ``XRSPATIAL_GEOTIFF_STRICT=1`` +forces the raise in ``'warn'`` mode too, so CI environments can enforce +fail-fast behavior globally. diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 680e1548..133b7eec 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -185,8 +185,13 @@ def read_vrt(source: str, *, unreadable backing source so a partial mosaic never surfaces silently. This matches the internal ``_vrt.read_vrt`` default and the rest of the geotiff module's up-front rejection of - malformed input. Prior to #1860 the public default was - ``'warn'``; callers that relied on the lenient behaviour pass + malformed input. Both the eager and chunked dispatchers raise + at construction time when the static missing-source sweep + finds any source file that does not exist on disk and + intersects the requested window (#2265); chunked callers no + longer have to wait until ``compute()`` to learn the VRT is + broken. Prior to #1860 the public default was ``'warn'``; + callers that relied on the lenient behaviour pass ``missing_sources='warn'`` explicitly. ``'warn'`` is the opt-in escape hatch for partial mosaics: it emits ``GeoTIFFFallbackWarning``, records ``attrs['vrt_holes']``, @@ -695,6 +700,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, import dask.array as da from .._reader import MAX_PIXELS_DEFAULT + from .._runtime import _geotiff_strict_mode from .._vrt import ( parse_vrt, _read_vrt_xml, @@ -972,17 +978,87 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # actually present. Each entry mirrors the eager schema: # ``{'source', 'band', 'dst_rect', 'error'}``. chunked_holes: list[dict] = [] - for vrt_band in vrt.bands: + for band_idx, vrt_band in enumerate(vrt.bands): + # When ``band`` is restricted, the per-chunk decode never touches + # bands outside the selection, so a missing source on an + # unrelated band does not affect the mosaic and should not + # populate ``vrt_holes`` (mirrors the eager path, which only + # decodes the selected band's sources). ``band`` is a 0-based + # index into ``vrt.bands``, same convention as the + # ``selected_bands = [vrt.bands[band]]`` slice above. We compare + # against ``band_idx`` rather than ``vrt_band.band_num`` + # (the XML's 1-based ``band=`` attribute) because the XML + # attribute does not have to match list position on hand-rolled + # VRTs. + if band is not None and band_idx != band: + continue for src in vrt_band.sources: if not _os.path.exists(src.filename): + # Skip holes that fall entirely outside the requested + # window. Each chunk task only decodes sources that + # intersect its destination rect, so a missing source + # outside the window never gets touched and the eager + # path with the same window would also not raise. + # ``win_r0/win_c0`` are the row/col origin of the + # requested window in the VRT's destination coordinate + # space and ``full_h/full_w`` are its size. + dst = src.dst_rect + if not ( + dst.x_off + dst.x_size > win_c0 + and dst.x_off < win_c0 + full_w + and dst.y_off + dst.y_size > win_r0 + and dst.y_off < win_r0 + full_h + ): + continue chunked_holes.append({ 'source': src.filename, 'band': vrt_band.band_num, - 'dst_rect': (src.dst_rect.x_off, src.dst_rect.y_off, - src.dst_rect.x_size, src.dst_rect.y_size), + 'dst_rect': (dst.x_off, dst.y_off, + dst.x_size, dst.y_size), 'error': 'FileNotFoundError: source file not found', }) + # Fail-fast for ``missing_sources='raise'`` (the public default since + # #1860). The docstring at the top of ``read_vrt`` promises that + # ``'raise'`` "fails immediately on an unreadable backing source so a + # partial mosaic never surfaces silently". Without this guard the + # chunked path constructs a delayed graph whose tasks each raise + # individually at compute time; if the caller never computes a chunk + # that touches a missing source (e.g. windowed downstream slicing + # past the hole), the raise never fires and the partial mosaic ships + # silently. The static ``os.path.exists`` sweep above already has the + # information needed to raise up front -- no extra source decoding + # required. ``XRSPATIAL_GEOTIFF_STRICT=1`` also forces the raise + # regardless of the kwarg, matching the eager path's strict-mode + # contract. See issue #2265. + if chunked_holes and ( + missing_sources == 'raise' or _geotiff_strict_mode() + ): + # Surface the first few missing paths in the message so the + # caller can act on them without having to flip to ``'warn'`` + # and re-parse the resulting ``attrs['vrt_holes']``. Cap the + # preview at 3 entries to keep the error string bounded on + # mosaics with many missing tiles -- the total count is + # appended so the caller still knows the full magnitude. + preview_max = 3 + preview = chunked_holes[:preview_max] + preview_str = ', '.join( + f"{h['source']!r} (band {h['band']})" for h in preview + ) + more = len(chunked_holes) - len(preview) + if more > 0: + preview_str += f" and {more} more" + raise FileNotFoundError( + f"VRT references missing source file(s) that intersect " + f"the requested window: {preview_str}. The chunked VRT " + f"read aborts up front under missing_sources='raise' " + f"(the default) so a partial mosaic never surfaces " + f"silently. Pass missing_sources='warn' to opt into the " + f"lenient path that records holes in attrs['vrt_holes'] " + f"and warns at compute time. " + f"{len(chunked_holes)} missing source(s) total." + ) + # Wave 3 of #2162: route attrs assembly through # ``_finalize_lazy_read_attrs`` so the VRT chunked path shares the # validate-then-populate-then-stamp block with the eager VRT path diff --git a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py b/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py index d4cb89d8..4969a02c 100644 --- a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py +++ b/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py @@ -41,12 +41,27 @@ def test_read_vrt_chunks_matches_eager_values(tmp_path): def test_read_vrt_chunks_does_not_read_sources_during_construction(tmp_path): + """The chunked path must not eagerly decode sources at build. + + Construction does run a cheap ``os.path.exists`` sweep over each + source (to populate ``vrt_holes`` and to fail-fast under the + default ``missing_sources='raise'``), but it must not open or + decode any source file. This test pairs the missing source with + the lenient ``missing_sources='warn'`` opt-in so the build + succeeds; the assertion is that no decode-time warnings (which + would only fire if the source were actually read) leak out + during construction. + """ vrt = tmp_path / "tmp_1798_missing_source.vrt" _write_vrt(vrt, "missing.tif") with warnings.catch_warnings(record=True) as caught: - lazy = read_vrt(str(vrt), chunks=2) + lazy = read_vrt(str(vrt), chunks=2, missing_sources="warn") + # Build-time warnings from the decode codecs should be absent. + # ``missing_sources='warn'`` does not warn at build time either; the + # per-task ``GeoTIFFFallbackWarning`` only fires when a chunk + # actually decodes the missing tile during ``compute()``. assert caught == [] assert hasattr(lazy.data, 'compute') diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py new file mode 100644 index 00000000..2be7946b --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py @@ -0,0 +1,325 @@ +"""Issue #2265: chunked VRT ``missing_sources='raise'`` must raise at build. + +The public docstring on ``read_vrt`` says ``missing_sources='raise'`` (the +public default since #1860) "fails immediately on an unreadable backing +source so a partial mosaic never surfaces silently". Before #2265 the +chunked path only honoured that contract at compute time: it ran a +static ``os.path.exists`` sweep at build, recorded misses into +``attrs['vrt_holes']``, and only the per-chunk delayed decode raised -- +which meant a windowed downstream slice past the bad tile could ship a +partial mosaic silently. This module pins the "raise at build" behaviour +and the related scoping invariants: + +* a missing source intersecting the requested window raises at build, +* a missing source outside the requested window does not raise, +* a missing source on a band the caller did not select does not raise, +* ``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise regardless of kwarg, +* ``missing_sources='warn'`` keeps the existing record-and-warn path. +""" +from __future__ import annotations + +import os +import warnings + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt, to_geotiff + + +def _write_present_source(tmp_path: str, name: str, fill: float) -> str: + """Write a 4x4 float32 GeoTIFF source for use in a multi-source VRT.""" + src = os.path.join(tmp_path, name) + arr = np.full((4, 4), fill, dtype=np.float32) + da = xr.DataArray( + arr, dims=("y", "x"), + attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)}, + ) + to_geotiff(da, src) + return src + + +def _make_horizontal_partial_vrt(tmp_path: str) -> str: + """2-source VRT: present source on the left, missing source on the right. + + Layout (rows x cols = 4 x 8): + ``[ present | missing ]``. Used for the basic + ``raise at build`` and window-scoping assertions. + """ + src = _write_present_source(tmp_path, "src_2265_h_present.tif", 7.0) + missing = os.path.join(tmp_path, "missing_2265_h.tif") + vrt_path = os.path.join(tmp_path, "partial_2265_h.vrt") + with open(vrt_path, "w") as f: + f.write( + f'\n' + '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + '\n' + '\n' + f'{src}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + f'{missing}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + '\n' + ) + return vrt_path + + +def _make_multiband_partial_vrt(tmp_path: str) -> str: + """2-band VRT where band 1 has a missing source and band 2 is intact. + + Both bands cover the full 4x4 extent with one source each. A + ``band=1`` (0-based, the second band) read should not raise because + the per-chunk decode never touches band 1's missing source. Reading + without a band restriction or with ``band=0`` should raise. + """ + src_b1 = _write_present_source(tmp_path, "src_2265_mb_b1.tif", 11.0) + src_b2 = _write_present_source(tmp_path, "src_2265_mb_b2.tif", 22.0) + missing_b1 = os.path.join(tmp_path, "missing_2265_mb_b1.tif") + vrt_path = os.path.join(tmp_path, "partial_2265_multiband.vrt") + with open(vrt_path, "w") as f: + f.write( + f'\n' + '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + # Band 1: one present source + one missing source covering + # the same extent. The missing source intersects every + # chunk window so the build must raise when band 1 is in + # scope. + '\n' + '\n' + f'{src_b1}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + f'{missing_b1}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + # Band 2: a single present source. ``band=1`` (0-based) on + # the chunked read should pick this band only and skip + # band 1's missing source. + '\n' + '\n' + f'{src_b2}\n' + '1\n' + '\n' + '\n' + '\n' + '\n' + '\n' + ) + return vrt_path + + +class TestRaiseAtBuild: + """``missing_sources='raise'`` raises during construction, not compute.""" + + def test_build_raises_immediately(self, tmp_path): + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError, match="missing_2265_h"): + read_vrt(vrt_path, chunks=4, missing_sources="raise") + + def test_default_raises_at_build(self, tmp_path): + """The public default is ``'raise'`` so dropping the kwarg + must hit the same fast-fail path.""" + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4) + + def test_error_message_mentions_opt_in(self, tmp_path): + """The exception text should tell the caller how to opt into + the lenient path. A regression that drops this guidance would + leave callers debugging a bare ``FileNotFoundError`` without + knowing the kwarg toggle exists.""" + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + msg = str(excinfo.value) + assert "missing_sources='warn'" in msg + assert "partial mosaic" in msg + + +class TestWindowScoping: + """The raise honours the requested window.""" + + def test_window_past_missing_does_not_raise(self, tmp_path): + """A window that touches only the present source still builds + and computes. Without this scoping the static raise would be + overzealous compared to the eager path (which decodes only + sources that intersect the window).""" + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + result = read_vrt( + vrt_path, chunks=4, window=(0, 0, 4, 4), + missing_sources="raise", + ) + computed = result.compute() + np.testing.assert_array_equal( + np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32), + ) + + def test_window_intersecting_missing_raises(self, tmp_path): + """A window that overlaps the missing tile still raises at build.""" + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt( + vrt_path, chunks=4, window=(0, 4, 4, 8), + missing_sources="raise", + ) + + +class TestBandScoping: + """The raise honours ``band=`` restriction.""" + + def test_band_select_skips_other_bands_missing_source(self, tmp_path): + """``band=1`` reads band 2 only; band 1's missing source is + irrelevant to the graph, so the build must not raise.""" + vrt_path = _make_multiband_partial_vrt(str(tmp_path)) + result = read_vrt( + vrt_path, chunks=4, band=1, missing_sources="raise", + ) + computed = result.compute() + np.testing.assert_array_equal( + np.asarray(computed), np.full((4, 4), 22.0, dtype=np.float32), + ) + + def test_band_select_on_missing_band_raises(self, tmp_path): + """``band=0`` selects the band with the missing source so the + build must raise (mirror of the unselected-band test above).""" + vrt_path = _make_multiband_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt( + vrt_path, chunks=4, band=0, missing_sources="raise", + ) + + def test_no_band_restriction_raises(self, tmp_path): + """Without a ``band=`` restriction, both bands' sources are in + scope and the missing source on band 1 raises at build.""" + vrt_path = _make_multiband_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4, missing_sources="raise") + + +class TestWarnPreserved: + """``missing_sources='warn'`` keeps the record-and-warn behaviour.""" + + def test_warn_records_holes_at_build(self, tmp_path): + """The lenient path must not regress to a build-time raise.""" + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + assert "vrt_holes" in result.attrs + assert len(result.attrs["vrt_holes"]) == 1 + assert result.attrs["vrt_holes"][0]["source"].endswith( + "missing_2265_h.tif" + ) + + def test_warn_compute_emits_per_task_warning(self, tmp_path): + """The compute step still warns per task on the lenient path.""" + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + computed = result.compute() + messages = [str(w.message) for w in caught + if isinstance(w.message, GeoTIFFFallbackWarning)] + assert any("missing_2265_h" in msg for msg in messages) + # Present side decodes to 7.0; missing side decodes to NaN. + np.testing.assert_array_equal( + np.asarray(computed)[:, :4], + np.full((4, 4), 7.0, dtype=np.float32), + ) + assert np.all(np.isnan(np.asarray(computed)[:, 4:])) + + +def _make_multi_missing_vrt(tmp_path: str, n_missing: int) -> str: + """VRT with ``n_missing`` missing sources tiling the destination. + + Each missing source covers a distinct 4x4 dst block laid out + horizontally; the VRT's full extent is sized to hold all of them. + Used to pin the multi-source preview behavior of the build-time + raise message. + """ + vrt_path = os.path.join(tmp_path, f"partial_2265_multi_{n_missing}.vrt") + width = 4 * n_missing + src_xml = [] + for i in range(n_missing): + missing = os.path.join(tmp_path, f"missing_2265_multi_{i}.tif") + src_xml.append( + '\n' + f'{missing}\n' + '1\n' + '\n' + f'\n' + '\n' + ) + with open(vrt_path, "w") as f: + f.write( + f'\n' + '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + '\n' + + ''.join(src_xml) + + '\n' + '\n' + ) + return vrt_path + + +class TestMultipleMissingSources: + """The error message previews multiple holes and reports the total.""" + + def test_two_missing_sources_listed_with_count(self, tmp_path): + """All missing sources fit in the preview (n=2 <= preview cap).""" + vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=2) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + msg = str(excinfo.value) + assert "missing_2265_multi_0" in msg + assert "missing_2265_multi_1" in msg + assert "2 missing source(s) total" in msg + # Preview cap kicks in only above 3 holes; no "and N more" tail + # should appear for n_missing=2. + assert "more" not in msg.lower() or "and 0 more" not in msg + + def test_many_missing_sources_truncated_with_more_suffix(self, tmp_path): + """Above the preview cap, the message says 'and N more'.""" + n = 5 + vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=n) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + msg = str(excinfo.value) + # First few names are listed; the rest collapse into "and N more". + assert "missing_2265_multi_0" in msg + # The last source should NOT be in the preview (it's past the cap). + assert f"missing_2265_multi_{n - 1}" not in msg + # Total count is reported regardless of truncation. + assert f"{n} missing source(s) total" in msg + # The truncation tail names how many more there are. + assert "and 2 more" in msg + + +class TestStrictMode: + """``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise even with ``'warn'``.""" + + def test_strict_overrides_warn_kwarg(self, tmp_path, monkeypatch): + monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1") + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError): + read_vrt(vrt_path, chunks=4, missing_sources="warn") + + def test_strict_off_warn_still_warns(self, tmp_path, monkeypatch): + """Sanity: without strict mode, ``'warn'`` keeps warning.""" + monkeypatch.delenv("XRSPATIAL_GEOTIFF_STRICT", raising=False) + vrt_path = _make_horizontal_partial_vrt(str(tmp_path)) + result = read_vrt(vrt_path, chunks=4, missing_sources="warn") + assert "vrt_holes" in result.attrs diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py index b7dd28fe..eaa1e347 100644 --- a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py +++ b/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py @@ -142,32 +142,46 @@ def test_chunks_tuple_form(self, tmp_path): class TestChunkedMissingSourcesRaise: - """``read_vrt(chunks=N, missing_sources='raise')`` fails on compute. - - The eager path raises at read time. The chunked path defers to - compute because each chunk's decode is delayed; an upfront raise - would force the parse-time sweep to decode every source, defeating - the lazy graph. The contract: chunks intersecting a missing source - raise on compute; chunks intersecting only present sources still - succeed. + """``read_vrt(chunks=N, missing_sources='raise')`` fails at build. + + The docstring on ``read_vrt`` promises that the default + ``'raise'`` "fails immediately on an unreadable backing source so a + partial mosaic never surfaces silently". Issue #2265 closes the + chunked-path gap: the static ``os.path.exists`` sweep that already + runs to populate ``vrt_holes`` now also raises up front when the + policy is ``'raise'`` and the sweep finds any hole intersecting the + requested window. Without this guard the build would succeed and + only ``result.compute()`` on a hole-touching chunk would raise, so + a downstream pipeline that windowed past the bad tile could ship a + partial mosaic silently. """ - def test_compute_intersecting_missing_raises(self, tmp_path): + def test_build_raises_immediately(self, tmp_path): vrt_path, _ = _make_partial_vrt(str(tmp_path)) - result = read_vrt(vrt_path, chunks=4, missing_sources="raise") - # Build does not raise (the graph is lazy). - # Computing a chunk that intersects the missing source raises. - with pytest.raises((OSError, ValueError)): - result.compute() - - def test_compute_present_only_chunk_succeeds(self, tmp_path): - """A windowed compute against only the present source succeeds. - - ``read_vrt(window=...)`` restricts the chunked graph to the - windowed extent; if the window misses the missing source, no - chunk needs to decode it and compute succeeds even under - ``missing_sources='raise'``. The contract: the raise policy is - scoped to chunks that actually touch missing sources. + with pytest.raises(FileNotFoundError, match="missing.tif"): + read_vrt(vrt_path, chunks=4, missing_sources="raise") + + def test_build_raise_message_mentions_policy_kwarg(self, tmp_path): + """The raise tells the caller how to opt into the lenient path. + + Lock in the kwarg-naming guidance in the error string so a + future refactor that drops or renames the suggestion regresses + the user-facing message rather than silently churning it. + """ + vrt_path, _ = _make_partial_vrt(str(tmp_path)) + with pytest.raises(FileNotFoundError) as excinfo: + read_vrt(vrt_path, chunks=4, missing_sources="raise") + assert "missing_sources='warn'" in str(excinfo.value) + + def test_window_past_missing_succeeds_under_raise(self, tmp_path): + """A window that does not touch a missing source still builds. + + The static sweep is scoped to the windowed extent. If the + window covers only present sources, the chunked graph has + nothing to raise about and ``compute()`` returns the present + tile. This preserves the contract that ``missing_sources`` + only fires when the requested region actually depends on a + missing source. """ vrt_path, _ = _make_partial_vrt(str(tmp_path)) # Window covers only the present source (cols 0-4). @@ -180,22 +194,44 @@ def test_compute_present_only_chunk_succeeds(self, tmp_path): np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32), ) + def test_band_selection_skips_other_bands_holes(self, tmp_path): + """A ``band=`` restriction scopes the static raise to that band. + + Mirrors the eager path: only sources on the selected band get + decoded, so a missing source on an unselected band should not + block the build. The partial VRT in this module is single-band + so the only way to exercise this is to confirm that the + single-band default still raises (sanity gate) -- the + cross-band gating is covered indirectly by the broader VRT + test matrix. + """ + vrt_path, _ = _make_partial_vrt(str(tmp_path)) + # Selecting band 0 (the only band) still touches the missing + # source so the build raises. The negative case (a missing + # source on a different band than the selected one) is hard to + # build without a multi-band VRT helper; the band_num gate in + # ``_read_vrt_chunked`` is exercised by the standalone test + # ``test_chunked_band_selection_skips_other_bands_holes`` below. + with pytest.raises(FileNotFoundError): + read_vrt( + vrt_path, chunks=4, band=0, missing_sources="raise", + ) + class TestChunkedMissingSourcesDefault: """The default ``missing_sources`` on chunked reads is ``'raise'``. The public ``read_vrt`` default flipped to ``'raise'`` in #1843 / - #1860. The chunked path goes through the same entry point so the - default must agree. A regression flipping the chunked default to - ``'warn'`` would silently produce partial mosaics for callers who - don't pass the kwarg. + #1860 and the chunked path now honours it at build time (#2265). + A regression flipping the chunked default to ``'warn'`` would + silently produce partial mosaics for callers who don't pass the + kwarg. """ - def test_chunked_default_raises_on_compute(self, tmp_path): + def test_chunked_default_raises_at_build(self, tmp_path): vrt_path, _ = _make_partial_vrt(str(tmp_path)) - result = read_vrt(vrt_path, chunks=4) - with pytest.raises((OSError, ValueError)): - result.compute() + with pytest.raises(FileNotFoundError, match="missing.tif"): + read_vrt(vrt_path, chunks=4) class TestChunkedMissingSourcesValidation: diff --git a/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py b/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py index c692bdcd..832586ab 100644 --- a/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py +++ b/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py @@ -277,11 +277,15 @@ def test_multiband_plus_chunks_preserves_band_dim(multiband_vrt): # --------------------------------------------------------------------------- def test_chunked_propagates_vrt_holes_when_source_missing(two_by_two_vrt): - """When a source referenced by the VRT does not exist on disk the - chunked reader must populate ``attrs['vrt_holes']`` with the same - schema the eager reader uses, so callers can branch on + """When a source referenced by the VRT does not exist on disk and + the caller opts into the lenient ``missing_sources='warn'`` path, + the chunked reader must populate ``attrs['vrt_holes']`` with the + same schema the eager reader uses, so callers can branch on ``"vrt_holes" in da.attrs`` regardless of which code path produced the DataArray. + + Note: the default ``missing_sources='raise'`` raises at build time + under #2265, so this test exercises the explicit ``'warn'`` opt-in. """ import warnings from xrspatial.geotiff import GeoTIFFFallbackWarning @@ -306,7 +310,9 @@ def test_chunked_propagates_vrt_holes_when_source_missing(two_by_two_vrt): with warnings.catch_warnings(): warnings.simplefilter('ignore', GeoTIFFFallbackWarning) - result = read_vrt(vrt_path, chunks=(64, 64)) + result = read_vrt( + vrt_path, chunks=(64, 64), missing_sources='warn', + ) assert 'vrt_holes' in result.attrs, ( "chunked path dropped vrt_holes contract from #1734"