Skip to content

Commit f1931bb

Browse files
committed
Refactor Rwanda ML: Hardcode tile bounds and remove rasterio for CI stability
1 parent c7ffa9b commit f1931bb

6 files changed

Lines changed: 77 additions & 33 deletions

File tree

fiboa_cli/datasets/rw_rwanda_ml.py

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,84 @@
11
import geopandas as gpd
2-
import rasterio
32
from shapely.geometry import box
4-
53
from .commons.ml_splits import MlSplitsMixin
64
from .rw_rwanda import RwRwandaConverter
75

8-
# All 57 TIFFs in the train folder define the train tile boundaries.
9-
# Fields overlapping any train tile to train, everything else to test.
10-
TRAIN_TIFF_BASE = (
11-
"https://data.source.coop/nasa/rwanda-field-boundary-competition"
12-
"/labels/train/{:02d}.tif"
13-
)
14-
TRAIN_TIFF_COUNT = 57
6+
# Pre-computed WGS84 bounding boxes of all 57 Rwanda train tiles.
7+
# Extracted once from https://data.source.coop/nasa/rwanda-field-boundary-competition/labels/train/
8+
# Eliminates runtime rasterio/HTTP dependency.
9+
TRAIN_TILE_BOUNDS_WGS84 = [
10+
(30.313811, -1.523916, 30.324798, -1.512933), # tile 00
11+
(30.368657, -1.490882, 30.379643, -1.479900), # tile 01
12+
(30.390544, -1.546095, 30.401530, -1.535113), # tile 02
13+
(30.302997, -1.402548, 30.313983, -1.391565), # tile 03
14+
(30.357671, -1.524002, 30.368657, -1.513019), # tile 04
15+
(30.335655, -1.579128, 30.346642, -1.568145), # tile 05
16+
(30.291882, -1.523916, 30.302868, -1.512933), # tile 06
17+
(30.302825, -1.546009, 30.313811, -1.535027), # tile 07
18+
(30.357714, -1.501908, 30.368700, -1.490925), # tile 08
19+
(30.313768, -1.557035, 30.324755, -1.546052), # tile 09
20+
(30.335741, -1.512933, 30.346727, -1.501951), # tile 10
21+
(30.401487, -1.557163, 30.412474, -1.546181), # tile 11
22+
(30.368614, -1.546095, 30.379601, -1.535113), # tile 12
23+
(30.346727, -1.501908, 30.357714, -1.490925), # tile 13
24+
(30.335741, -1.534984, 30.346727, -1.524002), # tile 14
25+
(30.313811, -1.534984, 30.324798, -1.524002), # tile 15
26+
(30.412517, -1.501994, 30.423503, -1.491011), # tile 16
27+
(30.324841, -1.457720, 30.335827, -1.446737), # tile 17
28+
(30.357714, -1.479857, 30.368700, -1.468874), # tile 18
29+
(30.324798, -1.490840, 30.335784, -1.479857), # tile 19
30+
(30.368657, -1.512976, 30.379643, -1.501994), # tile 20
31+
(30.346684, -1.546052, 30.357671, -1.535070), # tile 21
32+
(30.368743, -1.413660, 30.379729, -1.402677), # tile 22
33+
(30.302739, -1.590153, 30.313725, -1.579171), # tile 23
34+
(30.412431, -1.590281, 30.423417, -1.579299), # tile 24
35+
(30.324755, -1.546009, 30.335741, -1.535027), # tile 25
36+
(30.368700, -1.457806, 30.379686, -1.446823), # tile 26
37+
(30.346727, -1.512933, 30.357714, -1.501951), # tile 27
38+
(30.313811, -1.512890, 30.324798, -1.501908), # tile 28
39+
(30.324841, -1.468788, 30.335827, -1.457806), # tile 29
40+
(30.335655, -1.590196, 30.346642, -1.579213), # tile 30
41+
(30.390501, -1.568188, 30.401487, -1.557206), # tile 31
42+
(30.390673, -1.424686, 30.401659, -1.413703), # tile 32
43+
(30.379601, -1.524002, 30.390587, -1.513019), # tile 33
44+
(30.302911, -1.468746, 30.313897, -1.457763), # tile 34
45+
(30.368786, -1.402591, 30.379772, -1.391608), # tile 35
46+
(30.368700, -1.468831, 30.379686, -1.457849), # tile 36
47+
(30.401616, -1.446780, 30.412602, -1.435797), # tile 37
48+
(30.302825, -1.534941, 30.313811, -1.523959), # tile 38
49+
(30.357757, -1.435711, 30.368743, -1.424728), # tile 39
50+
(30.302825, -1.523916, 30.313811, -1.512933), # tile 40
51+
(30.335784, -1.490840, 30.346770, -1.479857), # tile 41
52+
(30.335784, -1.468788, 30.346770, -1.457806), # tile 42
53+
(30.313811, -1.501865, 30.324798, -1.490882), # tile 43
54+
(30.302868, -1.501865, 30.313854, -1.490882), # tile 44
55+
(30.313768, -1.546009, 30.324755, -1.535027), # tile 45
56+
(30.291882, -1.512890, 30.302868, -1.501908), # tile 46
57+
(30.412560, -1.490925, 30.423546, -1.479943), # tile 47
58+
(30.368743, -1.435711, 30.379729, -1.424728), # tile 48
59+
(30.335870, -1.424643, 30.346856, -1.413660), # tile 49
60+
(30.324798, -1.501865, 30.335784, -1.490882), # tile 50
61+
(30.390501, -1.579213, 30.401487, -1.568231), # tile 51
62+
(30.357757, -1.446737, 30.368743, -1.435754), # tile 52
63+
(30.357671, -1.535027, 30.368657, -1.524045), # tile 53
64+
(30.401616, -1.468874, 30.412602, -1.457891), # tile 54
65+
(30.368614, -1.524002, 30.379601, -1.513019), # tile 55
66+
(30.401530, -1.524045, 30.412517, -1.513062), # tile 56
67+
]
1568

1669

1770
def _get_train_tiles():
18-
"""Fetch bounding boxes of all train TIFFs via rasterio range requests.
19-
Only metadata is read and no full image download needed."""
20-
bounds_list = []
21-
crs = None
22-
for i in range(TRAIN_TIFF_COUNT):
23-
url = TRAIN_TIFF_BASE.format(i)
24-
with rasterio.open(url) as src:
25-
b = src.bounds
26-
if crs is None:
27-
crs = src.crs
28-
bounds_list.append({"geometry": box(b.left, b.bottom, b.right, b.top)})
29-
print(f" Fetched train tile {i + 1}/{TRAIN_TIFF_COUNT}: {url.split('/')[-1]}")
30-
print("All train tile bounds fetched.")
31-
return gpd.GeoDataFrame(bounds_list, crs=crs).to_crs("EPSG:4326")
71+
"""Return GeoDataFrame of train tile bounding boxes in WGS84.
72+
Uses pre-computed bounds - no rasterio or network calls needed."""
73+
geometries = [box(w, s, e, n) for w, s, e, n in TRAIN_TILE_BOUNDS_WGS84]
74+
return gpd.GeoDataFrame({"geometry": geometries}, crs="EPSG:4326")
3275

3376

3477
class RwRwandaMlConverter(MlSplitsMixin, RwRwandaConverter):
35-
3678
def file_migration(self, gdf, path, uri, layer=None):
3779
"""Assign train/test split to each field via spatial join with
38-
train TIFF bounding boxes. No val split for Rwanda."""
39-
print("Fetching train TIFF bounds for spatial join...")
80+
train tile bounding boxes. No val split for Rwanda."""
4081
train_tiles = _get_train_tiles()
41-
4282
gdf = gdf.reset_index(drop=True)
4383
joined = gpd.sjoin(
4484
gdf,
@@ -54,7 +94,5 @@ def file_migration(self, gdf, path, uri, layer=None):
5494
return super().file_migration(gdf, path, uri, layer)
5595

5696
def migrate(self, gdf):
57-
# Assign split from the temp column set in file_migration.
58-
# _source_split is not in self.columns so the framework drops it automatically.
5997
gdf["split"] = gdf["_source_split"].astype(object)
6098
return super().migrate(gdf)

pixi.lock

Lines changed: 9 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ pyproj = ">=3.6,<4.0"
4848
# rar is a bit difficult. vecorel-cli needs it for extracting some rar files in converters
4949
# but needs a rar cmd tool, which we need to install from conda explictly.
5050
unrar = ">=0.4"
51-
rasterio = ">=1.3"
5251
[tool.pixi.pypi-dependencies]
5352
# Editable install of the project itself
5453
fiboa-cli = {path = ".", editable = true}
-466 Bytes
Binary file not shown.
-466 Bytes
Binary file not shown.

tests/test_convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def _input_files(converter, *names):
7474
"jecam": _input_files("jecam", "BD_JECAM_CIRAD_2023_feb.shp"),
7575
"de_fusion_ml": _input_files("de_fusion", "de_test_2019.geojson", "de_train_2018.geojson"),
7676
"za_fusion_ml": _input_files("za_fusion", "za_train_258N.geojson", "za_train_259N.geojson", "za_test_2017.geojson"),
77-
"rw_rwanda_ml": _input_files("rw_rwanda", "rw_rwanda_2021.geojson", "train_tile.tif", "test_tile.tif"),
77+
"rw_rwanda_ml": _input_files("rw_rwanda", "rw_rwanda_2021.geojson"),
7878
}
7979

8080
@mark.parametrize("converter", tests)

0 commit comments

Comments
 (0)