diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst
index d0c0bb90..bdae376a 100644
--- a/docs/source/reference/geotiff.rst
+++ b/docs/source/reference/geotiff.rst
@@ -129,9 +129,18 @@ VRT missing sources
===================
``read_vrt`` accepts ``missing_sources='warn'`` or ``'raise'``. The default
-``'warn'`` preserves the historical behavior: unreadable source files emit
-:class:`xrspatial.geotiff.GeoTIFFFallbackWarning`, the returned DataArray
-contains ``attrs['vrt_holes']``, and the mosaic is returned with holes.
-Use ``missing_sources='raise'`` when a partial mosaic should fail the
-pipeline immediately. ``XRSPATIAL_GEOTIFF_STRICT=1`` still raises in
-``'warn'`` mode so CI environments can enforce fail-fast behavior globally.
+``'raise'`` (since #1860) fails the read immediately if any source file
+referenced by the VRT does not exist on disk. Both the eager and chunked
+dispatchers honour this at construction time -- chunked callers do not
+have to wait until ``compute()`` to learn the VRT is broken (#2265).
+The static missing-source sweep is scoped to the requested ``window=``
+and ``band=`` so a windowed or band-restricted read that does not depend
+on a missing source still succeeds.
+
+Pass ``missing_sources='warn'`` to opt into the lenient path: unreadable
+source files emit :class:`xrspatial.geotiff.GeoTIFFFallbackWarning`, the
+returned DataArray carries ``attrs['vrt_holes']``, and the mosaic is
+returned with holes left as the band's nodata sentinel (or zero on
+integer bands without a sentinel). ``XRSPATIAL_GEOTIFF_STRICT=1``
+forces the raise in ``'warn'`` mode too, so CI environments can enforce
+fail-fast behavior globally.
diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py
index 680e1548..133b7eec 100644
--- a/xrspatial/geotiff/_backends/vrt.py
+++ b/xrspatial/geotiff/_backends/vrt.py
@@ -185,8 +185,13 @@ def read_vrt(source: str, *,
unreadable backing source so a partial mosaic never surfaces
silently. This matches the internal ``_vrt.read_vrt`` default
and the rest of the geotiff module's up-front rejection of
- malformed input. Prior to #1860 the public default was
- ``'warn'``; callers that relied on the lenient behaviour pass
+ malformed input. Both the eager and chunked dispatchers raise
+ at construction time when the static missing-source sweep
+ finds any source file that does not exist on disk and
+ intersects the requested window (#2265); chunked callers no
+ longer have to wait until ``compute()`` to learn the VRT is
+ broken. Prior to #1860 the public default was ``'warn'``;
+ callers that relied on the lenient behaviour pass
``missing_sources='warn'`` explicitly.
``'warn'`` is the opt-in escape hatch for partial mosaics: it
emits ``GeoTIFFFallbackWarning``, records ``attrs['vrt_holes']``,
@@ -695,6 +700,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype,
import dask.array as da
from .._reader import MAX_PIXELS_DEFAULT
+ from .._runtime import _geotiff_strict_mode
from .._vrt import (
parse_vrt,
_read_vrt_xml,
@@ -972,17 +978,87 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype,
# actually present. Each entry mirrors the eager schema:
# ``{'source', 'band', 'dst_rect', 'error'}``.
chunked_holes: list[dict] = []
- for vrt_band in vrt.bands:
+ for band_idx, vrt_band in enumerate(vrt.bands):
+ # When ``band`` is restricted, the per-chunk decode never touches
+ # bands outside the selection, so a missing source on an
+ # unrelated band does not affect the mosaic and should not
+ # populate ``vrt_holes`` (mirrors the eager path, which only
+ # decodes the selected band's sources). ``band`` is a 0-based
+ # index into ``vrt.bands``, same convention as the
+ # ``selected_bands = [vrt.bands[band]]`` slice above. We compare
+ # against ``band_idx`` rather than ``vrt_band.band_num``
+ # (the XML's 1-based ``band=`` attribute) because the XML
+ # attribute does not have to match list position on hand-rolled
+ # VRTs.
+ if band is not None and band_idx != band:
+ continue
for src in vrt_band.sources:
if not _os.path.exists(src.filename):
+ # Skip holes that fall entirely outside the requested
+ # window. Each chunk task only decodes sources that
+ # intersect its destination rect, so a missing source
+ # outside the window never gets touched and the eager
+ # path with the same window would also not raise.
+ # ``win_r0/win_c0`` are the row/col origin of the
+ # requested window in the VRT's destination coordinate
+ # space and ``full_h/full_w`` are its size.
+ dst = src.dst_rect
+ if not (
+ dst.x_off + dst.x_size > win_c0
+ and dst.x_off < win_c0 + full_w
+ and dst.y_off + dst.y_size > win_r0
+ and dst.y_off < win_r0 + full_h
+ ):
+ continue
chunked_holes.append({
'source': src.filename,
'band': vrt_band.band_num,
- 'dst_rect': (src.dst_rect.x_off, src.dst_rect.y_off,
- src.dst_rect.x_size, src.dst_rect.y_size),
+ 'dst_rect': (dst.x_off, dst.y_off,
+ dst.x_size, dst.y_size),
'error': 'FileNotFoundError: source file not found',
})
+ # Fail-fast for ``missing_sources='raise'`` (the public default since
+ # #1860). The docstring at the top of ``read_vrt`` promises that
+ # ``'raise'`` "fails immediately on an unreadable backing source so a
+ # partial mosaic never surfaces silently". Without this guard the
+ # chunked path constructs a delayed graph whose tasks each raise
+ # individually at compute time; if the caller never computes a chunk
+ # that touches a missing source (e.g. windowed downstream slicing
+ # past the hole), the raise never fires and the partial mosaic ships
+ # silently. The static ``os.path.exists`` sweep above already has the
+ # information needed to raise up front -- no extra source decoding
+ # required. ``XRSPATIAL_GEOTIFF_STRICT=1`` also forces the raise
+ # regardless of the kwarg, matching the eager path's strict-mode
+ # contract. See issue #2265.
+ if chunked_holes and (
+ missing_sources == 'raise' or _geotiff_strict_mode()
+ ):
+ # Surface the first few missing paths in the message so the
+ # caller can act on them without having to flip to ``'warn'``
+ # and re-parse the resulting ``attrs['vrt_holes']``. Cap the
+ # preview at 3 entries to keep the error string bounded on
+ # mosaics with many missing tiles -- the total count is
+ # appended so the caller still knows the full magnitude.
+ preview_max = 3
+ preview = chunked_holes[:preview_max]
+ preview_str = ', '.join(
+ f"{h['source']!r} (band {h['band']})" for h in preview
+ )
+ more = len(chunked_holes) - len(preview)
+ if more > 0:
+ preview_str += f" and {more} more"
+ raise FileNotFoundError(
+ f"VRT references missing source file(s) that intersect "
+ f"the requested window: {preview_str}. The chunked VRT "
+ f"read aborts up front under missing_sources='raise' "
+ f"(the default) so a partial mosaic never surfaces "
+ f"silently. Pass missing_sources='warn' to opt into the "
+ f"lenient path that records holes in attrs['vrt_holes'] "
+ f"and warns at compute time. "
+ f"{len(chunked_holes)} missing source(s) total."
+ )
+
# Wave 3 of #2162: route attrs assembly through
# ``_finalize_lazy_read_attrs`` so the VRT chunked path shares the
# validate-then-populate-then-stamp block with the eager VRT path
diff --git a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py b/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py
index d4cb89d8..4969a02c 100644
--- a/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py
+++ b/xrspatial/geotiff/tests/test_read_vrt_lazy_chunks_1798.py
@@ -41,12 +41,27 @@ def test_read_vrt_chunks_matches_eager_values(tmp_path):
def test_read_vrt_chunks_does_not_read_sources_during_construction(tmp_path):
+ """The chunked path must not eagerly decode sources at build.
+
+ Construction does run a cheap ``os.path.exists`` sweep over each
+ source (to populate ``vrt_holes`` and to fail-fast under the
+ default ``missing_sources='raise'``), but it must not open or
+ decode any source file. This test pairs the missing source with
+ the lenient ``missing_sources='warn'`` opt-in so the build
+ succeeds; the assertion is that no decode-time warnings (which
+ would only fire if the source were actually read) leak out
+ during construction.
+ """
vrt = tmp_path / "tmp_1798_missing_source.vrt"
_write_vrt(vrt, "missing.tif")
with warnings.catch_warnings(record=True) as caught:
- lazy = read_vrt(str(vrt), chunks=2)
+ lazy = read_vrt(str(vrt), chunks=2, missing_sources="warn")
+ # Build-time warnings from the decode codecs should be absent.
+ # ``missing_sources='warn'`` does not warn at build time either; the
+ # per-task ``GeoTIFFFallbackWarning`` only fires when a chunk
+ # actually decodes the missing tile during ``compute()``.
assert caught == []
assert hasattr(lazy.data, 'compute')
diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py
new file mode 100644
index 00000000..2be7946b
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_vrt_chunked_missing_raise_at_build_2265.py
@@ -0,0 +1,325 @@
+"""Issue #2265: chunked VRT ``missing_sources='raise'`` must raise at build.
+
+The public docstring on ``read_vrt`` says ``missing_sources='raise'`` (the
+public default since #1860) "fails immediately on an unreadable backing
+source so a partial mosaic never surfaces silently". Before #2265 the
+chunked path only honoured that contract at compute time: it ran a
+static ``os.path.exists`` sweep at build, recorded misses into
+``attrs['vrt_holes']``, and only the per-chunk delayed decode raised --
+which meant a windowed downstream slice past the bad tile could ship a
+partial mosaic silently. This module pins the "raise at build" behaviour
+and the related scoping invariants:
+
+* a missing source intersecting the requested window raises at build,
+* a missing source outside the requested window does not raise,
+* a missing source on a band the caller did not select does not raise,
+* ``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise regardless of kwarg,
+* ``missing_sources='warn'`` keeps the existing record-and-warn path.
+"""
+from __future__ import annotations
+
+import os
+import warnings
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt, to_geotiff
+
+
+def _write_present_source(tmp_path: str, name: str, fill: float) -> str:
+ """Write a 4x4 float32 GeoTIFF source for use in a multi-source VRT."""
+ src = os.path.join(tmp_path, name)
+ arr = np.full((4, 4), fill, dtype=np.float32)
+ da = xr.DataArray(
+ arr, dims=("y", "x"),
+ attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)},
+ )
+ to_geotiff(da, src)
+ return src
+
+
+def _make_horizontal_partial_vrt(tmp_path: str) -> str:
+ """2-source VRT: present source on the left, missing source on the right.
+
+ Layout (rows x cols = 4 x 8):
+ ``[ present | missing ]``. Used for the basic
+ ``raise at build`` and window-scoping assertions.
+ """
+ src = _write_present_source(tmp_path, "src_2265_h_present.tif", 7.0)
+ missing = os.path.join(tmp_path, "missing_2265_h.tif")
+ vrt_path = os.path.join(tmp_path, "partial_2265_h.vrt")
+ with open(vrt_path, "w") as f:
+ f.write(
+ f'\n'
+ '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ '\n'
+ '\n'
+ f'{src}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ f'{missing}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ )
+ return vrt_path
+
+
+def _make_multiband_partial_vrt(tmp_path: str) -> str:
+ """2-band VRT where band 1 has a missing source and band 2 is intact.
+
+ Both bands cover the full 4x4 extent with one source each. A
+ ``band=1`` (0-based, the second band) read should not raise because
+ the per-chunk decode never touches band 1's missing source. Reading
+ without a band restriction or with ``band=0`` should raise.
+ """
+ src_b1 = _write_present_source(tmp_path, "src_2265_mb_b1.tif", 11.0)
+ src_b2 = _write_present_source(tmp_path, "src_2265_mb_b2.tif", 22.0)
+ missing_b1 = os.path.join(tmp_path, "missing_2265_mb_b1.tif")
+ vrt_path = os.path.join(tmp_path, "partial_2265_multiband.vrt")
+ with open(vrt_path, "w") as f:
+ f.write(
+ f'\n'
+ '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ # Band 1: one present source + one missing source covering
+ # the same extent. The missing source intersects every
+ # chunk window so the build must raise when band 1 is in
+ # scope.
+ '\n'
+ '\n'
+ f'{src_b1}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ f'{missing_b1}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ # Band 2: a single present source. ``band=1`` (0-based) on
+ # the chunked read should pick this band only and skip
+ # band 1's missing source.
+ '\n'
+ '\n'
+ f'{src_b2}\n'
+ '1\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ '\n'
+ )
+ return vrt_path
+
+
+class TestRaiseAtBuild:
+ """``missing_sources='raise'`` raises during construction, not compute."""
+
+ def test_build_raises_immediately(self, tmp_path):
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError, match="missing_2265_h"):
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+
+ def test_default_raises_at_build(self, tmp_path):
+ """The public default is ``'raise'`` so dropping the kwarg
+ must hit the same fast-fail path."""
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4)
+
+ def test_error_message_mentions_opt_in(self, tmp_path):
+ """The exception text should tell the caller how to opt into
+ the lenient path. A regression that drops this guidance would
+ leave callers debugging a bare ``FileNotFoundError`` without
+ knowing the kwarg toggle exists."""
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ msg = str(excinfo.value)
+ assert "missing_sources='warn'" in msg
+ assert "partial mosaic" in msg
+
+
+class TestWindowScoping:
+ """The raise honours the requested window."""
+
+ def test_window_past_missing_does_not_raise(self, tmp_path):
+ """A window that touches only the present source still builds
+ and computes. Without this scoping the static raise would be
+ overzealous compared to the eager path (which decodes only
+ sources that intersect the window)."""
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ result = read_vrt(
+ vrt_path, chunks=4, window=(0, 0, 4, 4),
+ missing_sources="raise",
+ )
+ computed = result.compute()
+ np.testing.assert_array_equal(
+ np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32),
+ )
+
+ def test_window_intersecting_missing_raises(self, tmp_path):
+ """A window that overlaps the missing tile still raises at build."""
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(
+ vrt_path, chunks=4, window=(0, 4, 4, 8),
+ missing_sources="raise",
+ )
+
+
+class TestBandScoping:
+ """The raise honours ``band=`` restriction."""
+
+ def test_band_select_skips_other_bands_missing_source(self, tmp_path):
+ """``band=1`` reads band 2 only; band 1's missing source is
+ irrelevant to the graph, so the build must not raise."""
+ vrt_path = _make_multiband_partial_vrt(str(tmp_path))
+ result = read_vrt(
+ vrt_path, chunks=4, band=1, missing_sources="raise",
+ )
+ computed = result.compute()
+ np.testing.assert_array_equal(
+ np.asarray(computed), np.full((4, 4), 22.0, dtype=np.float32),
+ )
+
+ def test_band_select_on_missing_band_raises(self, tmp_path):
+ """``band=0`` selects the band with the missing source so the
+ build must raise (mirror of the unselected-band test above)."""
+ vrt_path = _make_multiband_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(
+ vrt_path, chunks=4, band=0, missing_sources="raise",
+ )
+
+ def test_no_band_restriction_raises(self, tmp_path):
+ """Without a ``band=`` restriction, both bands' sources are in
+ scope and the missing source on band 1 raises at build."""
+ vrt_path = _make_multiband_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+
+
+class TestWarnPreserved:
+ """``missing_sources='warn'`` keeps the record-and-warn behaviour."""
+
+ def test_warn_records_holes_at_build(self, tmp_path):
+ """The lenient path must not regress to a build-time raise."""
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ assert "vrt_holes" in result.attrs
+ assert len(result.attrs["vrt_holes"]) == 1
+ assert result.attrs["vrt_holes"][0]["source"].endswith(
+ "missing_2265_h.tif"
+ )
+
+ def test_warn_compute_emits_per_task_warning(self, tmp_path):
+ """The compute step still warns per task on the lenient path."""
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ with warnings.catch_warnings(record=True) as caught:
+ warnings.simplefilter("always")
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ computed = result.compute()
+ messages = [str(w.message) for w in caught
+ if isinstance(w.message, GeoTIFFFallbackWarning)]
+ assert any("missing_2265_h" in msg for msg in messages)
+ # Present side decodes to 7.0; missing side decodes to NaN.
+ np.testing.assert_array_equal(
+ np.asarray(computed)[:, :4],
+ np.full((4, 4), 7.0, dtype=np.float32),
+ )
+ assert np.all(np.isnan(np.asarray(computed)[:, 4:]))
+
+
+def _make_multi_missing_vrt(tmp_path: str, n_missing: int) -> str:
+ """VRT with ``n_missing`` missing sources tiling the destination.
+
+ Each missing source covers a distinct 4x4 dst block laid out
+ horizontally; the VRT's full extent is sized to hold all of them.
+ Used to pin the multi-source preview behavior of the build-time
+ raise message.
+ """
+ vrt_path = os.path.join(tmp_path, f"partial_2265_multi_{n_missing}.vrt")
+ width = 4 * n_missing
+ src_xml = []
+ for i in range(n_missing):
+ missing = os.path.join(tmp_path, f"missing_2265_multi_{i}.tif")
+ src_xml.append(
+ '\n'
+ f'{missing}\n'
+ '1\n'
+ '\n'
+ f'\n'
+ '\n'
+ )
+ with open(vrt_path, "w") as f:
+ f.write(
+ f'\n'
+ '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ '\n'
+ + ''.join(src_xml) +
+ '\n'
+ '\n'
+ )
+ return vrt_path
+
+
+class TestMultipleMissingSources:
+ """The error message previews multiple holes and reports the total."""
+
+ def test_two_missing_sources_listed_with_count(self, tmp_path):
+ """All missing sources fit in the preview (n=2 <= preview cap)."""
+ vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=2)
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ msg = str(excinfo.value)
+ assert "missing_2265_multi_0" in msg
+ assert "missing_2265_multi_1" in msg
+ assert "2 missing source(s) total" in msg
+ # Preview cap kicks in only above 3 holes; no "and N more" tail
+ # should appear for n_missing=2.
+ assert "more" not in msg.lower() or "and 0 more" not in msg
+
+ def test_many_missing_sources_truncated_with_more_suffix(self, tmp_path):
+ """Above the preview cap, the message says 'and N more'."""
+ n = 5
+ vrt_path = _make_multi_missing_vrt(str(tmp_path), n_missing=n)
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ msg = str(excinfo.value)
+ # First few names are listed; the rest collapse into "and N more".
+ assert "missing_2265_multi_0" in msg
+ # The last source should NOT be in the preview (it's past the cap).
+ assert f"missing_2265_multi_{n - 1}" not in msg
+ # Total count is reported regardless of truncation.
+ assert f"{n} missing source(s) total" in msg
+ # The truncation tail names how many more there are.
+ assert "and 2 more" in msg
+
+
+class TestStrictMode:
+ """``XRSPATIAL_GEOTIFF_STRICT=1`` forces the raise even with ``'warn'``."""
+
+ def test_strict_overrides_warn_kwarg(self, tmp_path, monkeypatch):
+ monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1")
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError):
+ read_vrt(vrt_path, chunks=4, missing_sources="warn")
+
+ def test_strict_off_warn_still_warns(self, tmp_path, monkeypatch):
+ """Sanity: without strict mode, ``'warn'`` keeps warning."""
+ monkeypatch.delenv("XRSPATIAL_GEOTIFF_STRICT", raising=False)
+ vrt_path = _make_horizontal_partial_vrt(str(tmp_path))
+ result = read_vrt(vrt_path, chunks=4, missing_sources="warn")
+ assert "vrt_holes" in result.attrs
diff --git a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py b/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py
index b7dd28fe..eaa1e347 100644
--- a/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py
+++ b/xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py
@@ -142,32 +142,46 @@ def test_chunks_tuple_form(self, tmp_path):
class TestChunkedMissingSourcesRaise:
- """``read_vrt(chunks=N, missing_sources='raise')`` fails on compute.
-
- The eager path raises at read time. The chunked path defers to
- compute because each chunk's decode is delayed; an upfront raise
- would force the parse-time sweep to decode every source, defeating
- the lazy graph. The contract: chunks intersecting a missing source
- raise on compute; chunks intersecting only present sources still
- succeed.
+ """``read_vrt(chunks=N, missing_sources='raise')`` fails at build.
+
+ The docstring on ``read_vrt`` promises that the default
+ ``'raise'`` "fails immediately on an unreadable backing source so a
+ partial mosaic never surfaces silently". Issue #2265 closes the
+ chunked-path gap: the static ``os.path.exists`` sweep that already
+ runs to populate ``vrt_holes`` now also raises up front when the
+ policy is ``'raise'`` and the sweep finds any hole intersecting the
+ requested window. Without this guard the build would succeed and
+ only ``result.compute()`` on a hole-touching chunk would raise, so
+ a downstream pipeline that windowed past the bad tile could ship a
+ partial mosaic silently.
"""
- def test_compute_intersecting_missing_raises(self, tmp_path):
+ def test_build_raises_immediately(self, tmp_path):
vrt_path, _ = _make_partial_vrt(str(tmp_path))
- result = read_vrt(vrt_path, chunks=4, missing_sources="raise")
- # Build does not raise (the graph is lazy).
- # Computing a chunk that intersects the missing source raises.
- with pytest.raises((OSError, ValueError)):
- result.compute()
-
- def test_compute_present_only_chunk_succeeds(self, tmp_path):
- """A windowed compute against only the present source succeeds.
-
- ``read_vrt(window=...)`` restricts the chunked graph to the
- windowed extent; if the window misses the missing source, no
- chunk needs to decode it and compute succeeds even under
- ``missing_sources='raise'``. The contract: the raise policy is
- scoped to chunks that actually touch missing sources.
+ with pytest.raises(FileNotFoundError, match="missing.tif"):
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+
+ def test_build_raise_message_mentions_policy_kwarg(self, tmp_path):
+ """The raise tells the caller how to opt into the lenient path.
+
+ Lock in the kwarg-naming guidance in the error string so a
+ future refactor that drops or renames the suggestion regresses
+ the user-facing message rather than silently churning it.
+ """
+ vrt_path, _ = _make_partial_vrt(str(tmp_path))
+ with pytest.raises(FileNotFoundError) as excinfo:
+ read_vrt(vrt_path, chunks=4, missing_sources="raise")
+ assert "missing_sources='warn'" in str(excinfo.value)
+
+ def test_window_past_missing_succeeds_under_raise(self, tmp_path):
+ """A window that does not touch a missing source still builds.
+
+ The static sweep is scoped to the windowed extent. If the
+ window covers only present sources, the chunked graph has
+ nothing to raise about and ``compute()`` returns the present
+ tile. This preserves the contract that ``missing_sources``
+ only fires when the requested region actually depends on a
+ missing source.
"""
vrt_path, _ = _make_partial_vrt(str(tmp_path))
# Window covers only the present source (cols 0-4).
@@ -180,22 +194,44 @@ def test_compute_present_only_chunk_succeeds(self, tmp_path):
np.asarray(computed), np.full((4, 4), 7.0, dtype=np.float32),
)
+ def test_band_selection_skips_other_bands_holes(self, tmp_path):
+ """A ``band=`` restriction scopes the static raise to that band.
+
+ Mirrors the eager path: only sources on the selected band get
+ decoded, so a missing source on an unselected band should not
+ block the build. The partial VRT in this module is single-band
+ so the only way to exercise this is to confirm that the
+ single-band default still raises (sanity gate) -- the
+ cross-band gating is covered indirectly by the broader VRT
+ test matrix.
+ """
+ vrt_path, _ = _make_partial_vrt(str(tmp_path))
+ # Selecting band 0 (the only band) still touches the missing
+ # source so the build raises. The negative case (a missing
+ # source on a different band than the selected one) is hard to
+ # build without a multi-band VRT helper; the band_num gate in
+ # ``_read_vrt_chunked`` is exercised by the standalone test
+ # ``test_chunked_band_selection_skips_other_bands_holes`` below.
+ with pytest.raises(FileNotFoundError):
+ read_vrt(
+ vrt_path, chunks=4, band=0, missing_sources="raise",
+ )
+
class TestChunkedMissingSourcesDefault:
"""The default ``missing_sources`` on chunked reads is ``'raise'``.
The public ``read_vrt`` default flipped to ``'raise'`` in #1843 /
- #1860. The chunked path goes through the same entry point so the
- default must agree. A regression flipping the chunked default to
- ``'warn'`` would silently produce partial mosaics for callers who
- don't pass the kwarg.
+ #1860 and the chunked path now honours it at build time (#2265).
+ A regression flipping the chunked default to ``'warn'`` would
+ silently produce partial mosaics for callers who don't pass the
+ kwarg.
"""
- def test_chunked_default_raises_on_compute(self, tmp_path):
+ def test_chunked_default_raises_at_build(self, tmp_path):
vrt_path, _ = _make_partial_vrt(str(tmp_path))
- result = read_vrt(vrt_path, chunks=4)
- with pytest.raises((OSError, ValueError)):
- result.compute()
+ with pytest.raises(FileNotFoundError, match="missing.tif"):
+ read_vrt(vrt_path, chunks=4)
class TestChunkedMissingSourcesValidation:
diff --git a/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py b/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py
index c692bdcd..832586ab 100644
--- a/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py
+++ b/xrspatial/geotiff/tests/test_vrt_lazy_chunks_1814.py
@@ -277,11 +277,15 @@ def test_multiband_plus_chunks_preserves_band_dim(multiband_vrt):
# ---------------------------------------------------------------------------
def test_chunked_propagates_vrt_holes_when_source_missing(two_by_two_vrt):
- """When a source referenced by the VRT does not exist on disk the
- chunked reader must populate ``attrs['vrt_holes']`` with the same
- schema the eager reader uses, so callers can branch on
+ """When a source referenced by the VRT does not exist on disk and
+ the caller opts into the lenient ``missing_sources='warn'`` path,
+ the chunked reader must populate ``attrs['vrt_holes']`` with the
+ same schema the eager reader uses, so callers can branch on
``"vrt_holes" in da.attrs`` regardless of which code path produced
the DataArray.
+
+ Note: the default ``missing_sources='raise'`` raises at build time
+ under #2265, so this test exercises the explicit ``'warn'`` opt-in.
"""
import warnings
from xrspatial.geotiff import GeoTIFFFallbackWarning
@@ -306,7 +310,9 @@ def test_chunked_propagates_vrt_holes_when_source_missing(two_by_two_vrt):
with warnings.catch_warnings():
warnings.simplefilter('ignore', GeoTIFFFallbackWarning)
- result = read_vrt(vrt_path, chunks=(64, 64))
+ result = read_vrt(
+ vrt_path, chunks=(64, 64), missing_sources='warn',
+ )
assert 'vrt_holes' in result.attrs, (
"chunked path dropped vrt_holes contract from #1734"