From 90356d1c08cda881601abda477c7018100835731 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 15:35:29 +0200 Subject: [PATCH 01/42] Add tutorial.open_dataset --- src/parcels/_tutorial.py | 62 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py index 34409312cc..5c0fd5cac5 100644 --- a/src/parcels/_tutorial.py +++ b/src/parcels/_tutorial.py @@ -4,6 +4,7 @@ import pooch import xarray as xr +import zarr from parcels._v3to4 import patch_dataset_v4_compat @@ -106,6 +107,40 @@ "CROCOidealized_data": ["CROCO_idealized.nc"], } +EXAMPLE_DATA_FILES2: list[str] = [ + "MovingEddies_data/moving_eddiesP.nc", + "MovingEddies_data/moving_eddiesU.nc", + "MovingEddies_data/moving_eddiesV.nc", + "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc", + "OFAM_example_data/OFAM_simple_U.nc", + "OFAM_example_data/OFAM_simple_V.nc", + "Peninsula_data/peninsulaU.nc", + "Peninsula_data/peninsulaV.nc", + "Peninsula_data/peninsulaP.nc", + "Peninsula_data/peninsulaT.nc", + "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", + "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "DecayingMovingEddy_data/decaying_moving_eddyU.nc", + "DecayingMovingEddy_data/decaying_moving_eddyV.nc", + "FESOM_periodic_channel/fesom_channel.nc", + "FESOM_periodic_channel/u.fesom_channel.nc", + "FESOM_periodic_channel/v.fesom_channel.nc", + "FESOM_periodic_channel/w.fesom_channel.nc", + "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4", + "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4", + "NemoCurvilinear_data/mesh_mask.nc4", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc", + "NemoNorthSeaORCA025-N006_data/coordinates.nc", + # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 + "SWASH_data/field_00655*.nc", + "WOA_data/woa18_decav_t*_04.nc", + "CROCOidealized_data/CROCO_idealized.nc", +] + def _create_pooch_registry() -> dict[str, None]: """Collapses the mapping of dataset names to filenames into a pooch registry. @@ -135,7 +170,7 @@ def _get_pooch(data_home=None): ) -def list_example_datasets() -> list[str]: +def list_example_datasets(full=False) -> list[str]: # TODO: Remove full flag when migrating to open_dataset """List the available example datasets. Use :func:`download_example_dataset` to download one of the datasets. @@ -145,7 +180,11 @@ def list_example_datasets() -> list[str]: datasets : list of str The names of the available example datasets. """ - return list(EXAMPLE_DATA_FILES.keys()) + if full: + return EXAMPLE_DATA_FILES2 + return list( + set(i.split("/")[0] for i in EXAMPLE_DATA_FILES2) + ) # TODO: Update implementation to return full dataset item and not just stem, to be in line with `open_dataset` def download_example_dataset(dataset: str, data_home=None): @@ -186,6 +225,25 @@ def download_example_dataset(dataset: str, data_home=None): return dataset_folder +# Just creating a temp folder to help during the migration +TMP_ZARR_FOLDER = Path("../parcels-data-zarr/data") + + +def open_dataset(dataset: str): + open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False) + # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" + dataset_stem, rest = dataset.split("/", maxsplit=1) + folder = download_example_dataset(dataset_stem) + + ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) + path = TMP_ZARR_FOLDER / f"{dataset}.zip" + path.parent.mkdir(exist_ok=True) + if not path.exists(): + with zarr.storage.ZipStore(path, mode="w") as store: + ds.to_zarr(store) + return xr.open_zarr(path, **open_dataset_kwargs) + + def _v4_compat_patch(fname, action, pup): """ Patch the GlobCurrent example dataset to be compatible with v4. From 1c364befcde9377c9061281f2f8c9c679642dacf Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:13:43 +0200 Subject: [PATCH 02/42] Update dataset names for `open_dataset` --- src/parcels/_tutorial.py | 93 +++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py index 5c0fd5cac5..8a6882d115 100644 --- a/src/parcels/_tutorial.py +++ b/src/parcels/_tutorial.py @@ -107,39 +107,43 @@ "CROCOidealized_data": ["CROCO_idealized.nc"], } -EXAMPLE_DATA_FILES2: list[str] = [ - "MovingEddies_data/moving_eddiesP.nc", - "MovingEddies_data/moving_eddiesU.nc", - "MovingEddies_data/moving_eddiesV.nc", - "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc", - "OFAM_example_data/OFAM_simple_U.nc", - "OFAM_example_data/OFAM_simple_V.nc", - "Peninsula_data/peninsulaU.nc", - "Peninsula_data/peninsulaV.nc", - "Peninsula_data/peninsulaP.nc", - "Peninsula_data/peninsulaT.nc", - "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", - "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "DecayingMovingEddy_data/decaying_moving_eddyU.nc", - "DecayingMovingEddy_data/decaying_moving_eddyV.nc", - "FESOM_periodic_channel/fesom_channel.nc", - "FESOM_periodic_channel/u.fesom_channel.nc", - "FESOM_periodic_channel/v.fesom_channel.nc", - "FESOM_periodic_channel/w.fesom_channel.nc", - "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4", - "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4", - "NemoCurvilinear_data/mesh_mask.nc4", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc", - "NemoNorthSeaORCA025-N006_data/coordinates.nc", +# The first here is a human readable key, the latter the path to load the netcdf data +# (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`) +# fmt: off +DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([ + ("MovingEddies_data/P", "MovingEddies_data/moving_eddiesP.nc"), + ("MovingEddies_data/U", "MovingEddies_data/moving_eddiesU.nc"), + ("MovingEddies_data/V", "MovingEddies_data/moving_eddiesV.nc"), + ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), + ("OFAM_example_data/U", "OFAM_example_data/OFAM_simple_U.nc"), + ("OFAM_example_data/V", "OFAM_example_data/OFAM_simple_V.nc"), + ("Peninsula_data/U", "Peninsula_data/peninsulaU.nc"), + ("Peninsula_data/V", "Peninsula_data/peninsulaV.nc"), + ("Peninsula_data/P", "Peninsula_data/peninsulaP.nc"), + ("Peninsula_data/T", "Peninsula_data/peninsulaT.nc"), + ("GlobCurrent_example_data/data.nc", "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), + ("DecayingMovingEddy_data/U", "DecayingMovingEddy_data/decaying_moving_eddyU.nc"), + ("DecayingMovingEddy_data/V", "DecayingMovingEddy_data/decaying_moving_eddyV.nc"), + ("FESOM_periodic_channel/fesom_channel", "FESOM_periodic_channel/fesom_channel.nc"), + ("FESOM_periodic_channel/u.fesom_channel", "FESOM_periodic_channel/u.fesom_channel.nc"), + ("FESOM_periodic_channel/v.fesom_channel", "FESOM_periodic_channel/v.fesom_channel.nc"), + ("FESOM_periodic_channel/w.fesom_channel", "FESOM_periodic_channel/w.fesom_channel.nc"), + ("NemoCurvilinear_data_zonal/U", "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), + ("NemoCurvilinear_data_zonal/V", "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), + ("NemoCurvilinear_data_zonal/mesh_mask", "NemoCurvilinear_data/mesh_mask.nc4"), + ("NemoNorthSeaORCA025-N006_data/U", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), + ("NemoNorthSeaORCA025-N006_data/V", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), + ("NemoNorthSeaORCA025-N006_data/W", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), + ("NemoNorthSeaORCA025-N006_data/mesh_mask", "NemoNorthSeaORCA025-N006_data/coordinates.nc"), # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 - "SWASH_data/field_00655*.nc", - "WOA_data/woa18_decav_t*_04.nc", - "CROCOidealized_data/CROCO_idealized.nc", -] + ("SWASH_data/data", "SWASH_data/field_00655*.nc"), + ("WOA_data/data", "WOA_data/woa18_decav_t*_04.nc"), + ("CROCOidealized_data/data", "CROCOidealized_data/CROCO_idealized.nc"), +]) +# fmt: on def _create_pooch_registry() -> dict[str, None]: @@ -170,7 +174,7 @@ def _get_pooch(data_home=None): ) -def list_example_datasets(full=False) -> list[str]: # TODO: Remove full flag when migrating to open_dataset +def list_example_datasets(v4=False) -> list[str]: # TODO: Remove v4 flag when migrating to open_dataset """List the available example datasets. Use :func:`download_example_dataset` to download one of the datasets. @@ -180,11 +184,9 @@ def list_example_datasets(full=False) -> list[str]: # TODO: Remove full flag wh datasets : list of str The names of the available example datasets. """ - if full: - return EXAMPLE_DATA_FILES2 - return list( - set(i.split("/")[0] for i in EXAMPLE_DATA_FILES2) - ) # TODO: Update implementation to return full dataset item and not just stem, to be in line with `open_dataset` + if v4: + return list(DATASET_KEYS_AND_OPEN_PATHS.keys()) + return list(set(v.split("/")[0] for v in DATASET_KEYS_AND_OPEN_PATHS.values())) def download_example_dataset(dataset: str, data_home=None): @@ -226,17 +228,22 @@ def download_example_dataset(dataset: str, data_home=None): # Just creating a temp folder to help during the migration -TMP_ZARR_FOLDER = Path("../parcels-data-zarr/data") +TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr") -def open_dataset(dataset: str): +def open_dataset(name: str): + if name not in DATASET_KEYS_AND_OPEN_PATHS: + raise ValueError( + f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True)) + ) + open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False) # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" - dataset_stem, rest = dataset.split("/", maxsplit=1) - folder = download_example_dataset(dataset_stem) + download_dataset_stem, rest = DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1) + folder = download_example_dataset(download_dataset_stem) ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) - path = TMP_ZARR_FOLDER / f"{dataset}.zip" + path = TMP_ZARR_FOLDER / f"{name}.zip" path.parent.mkdir(exist_ok=True) if not path.exists(): with zarr.storage.ZipStore(path, mode="w") as store: From f5cbc2565209904d241d9e607a6e194a365966c8 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:14:06 +0200 Subject: [PATCH 03/42] Port accross tutorial util tests --- .../tools/test_exampledata_utils.py => tests/test_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tests-v3/tools/test_exampledata_utils.py => tests/test_tutorial.py (96%) diff --git a/tests-v3/tools/test_exampledata_utils.py b/tests/test_tutorial.py similarity index 96% rename from tests-v3/tools/test_exampledata_utils.py rename to tests/test_tutorial.py index 94ed9cf833..3169a07c2e 100644 --- a/tests-v3/tools/test_exampledata_utils.py +++ b/tests/test_tutorial.py @@ -1,7 +1,7 @@ import pytest import requests -from parcels.tools.exampledata_utils import ( +from parcels._tutorial import ( _get_pooch, download_example_dataset, list_example_datasets, From e5a85c4d30ec85c314c570575a121c3264aedf3c Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:18:01 +0200 Subject: [PATCH 04/42] Add test_open_dataset --- tests/test_tutorial.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py index 3169a07c2e..f603899308 100644 --- a/tests/test_tutorial.py +++ b/tests/test_tutorial.py @@ -1,10 +1,12 @@ import pytest import requests +import xarray as xr from parcels._tutorial import ( _get_pooch, download_example_dataset, list_example_datasets, + open_dataset, ) @@ -35,3 +37,9 @@ def test_download_example_dataset_no_data_home(): dataset_folder_path = download_example_dataset(dataset) assert dataset_folder_path.exists() assert dataset_folder_path.name == dataset + + +@pytest.mark.parametrize("name", list_example_datasets(v4=True)) +def test_open_dataset(name): + ds = open_dataset(name) + assert isinstance(ds, xr.Dataset) From 9ef869a523f0b44fe7c34dcecd5a600d03ffa261 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:24:56 +0200 Subject: [PATCH 05/42] Silence future warning --- src/parcels/_tutorial.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py index 8a6882d115..f82d8675e5 100644 --- a/src/parcels/_tutorial.py +++ b/src/parcels/_tutorial.py @@ -242,7 +242,8 @@ def open_dataset(name: str): download_dataset_stem, rest = DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1) folder = download_example_dataset(download_dataset_stem) - ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) + with xr.set_options(use_new_combine_kwarg_defaults=True): + ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) path = TMP_ZARR_FOLDER / f"{name}.zip" path.parent.mkdir(exist_ok=True) if not path.exists(): From 0f6a8c9e160f3931f45cbcf792e08bdb91d7cb8e Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:01:45 +0200 Subject: [PATCH 06/42] Make variables private --- src/parcels/_tutorial.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py index f82d8675e5..56ff0531bd 100644 --- a/src/parcels/_tutorial.py +++ b/src/parcels/_tutorial.py @@ -12,9 +12,9 @@ # When modifying existing datasets in a backwards incompatible way, # make a new release in the repo and update the DATA_REPO_TAG to the new tag -DATA_REPO_TAG = "main" +_DATA_REPO_TAG = "main" -DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{DATA_REPO_TAG}/data" +_DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data" # Keys are the dataset names. Values are the filenames in the dataset folder. Note that # you can specify subfolders in the dataset folder putting slashes in the filename list. @@ -28,7 +28,7 @@ # └── file2.nc # # See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets -EXAMPLE_DATA_FILES: dict[str, list[str]] = { +_EXAMPLE_DATA_FILES: dict[str, list[str]] = { "MovingEddies_data": [ "moving_eddiesP.nc", "moving_eddiesU.nc", @@ -110,7 +110,7 @@ # The first here is a human readable key, the latter the path to load the netcdf data # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`) # fmt: off -DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([ +_DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([ ("MovingEddies_data/P", "MovingEddies_data/moving_eddiesP.nc"), ("MovingEddies_data/U", "MovingEddies_data/moving_eddiesU.nc"), ("MovingEddies_data/V", "MovingEddies_data/moving_eddiesV.nc"), @@ -152,7 +152,7 @@ def _create_pooch_registry() -> dict[str, None]: Hashes are set to None for all files. """ registry: dict[str, None] = {} - for dataset, filenames in EXAMPLE_DATA_FILES.items(): + for dataset, filenames in _EXAMPLE_DATA_FILES.items(): for filename in filenames: registry[f"{dataset}/{filename}"] = None return registry @@ -169,7 +169,7 @@ def _get_pooch(data_home=None): return pooch.create( path=data_home, - base_url=DATA_URL, + base_url=_DATA_URL, registry=POOCH_REGISTRY, ) @@ -185,8 +185,8 @@ def list_example_datasets(v4=False) -> list[str]: # TODO: Remove v4 flag when m The names of the available example datasets. """ if v4: - return list(DATASET_KEYS_AND_OPEN_PATHS.keys()) - return list(set(v.split("/")[0] for v in DATASET_KEYS_AND_OPEN_PATHS.values())) + return list(_DATASET_KEYS_AND_OPEN_PATHS.keys()) + return list(set(v.split("/")[0] for v in _DATASET_KEYS_AND_OPEN_PATHS.values())) def download_example_dataset(dataset: str, data_home=None): @@ -210,9 +210,9 @@ def download_example_dataset(dataset: str, data_home=None): Path to the folder containing the downloaded dataset files. """ # Dev note: `dataset` is assumed to be a folder name with netcdf files - if dataset not in EXAMPLE_DATA_FILES: + if dataset not in _EXAMPLE_DATA_FILES: raise ValueError( - f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(EXAMPLE_DATA_FILES.keys()) + f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys()) ) odie = _get_pooch(data_home=data_home) @@ -228,23 +228,23 @@ def download_example_dataset(dataset: str, data_home=None): # Just creating a temp folder to help during the migration -TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr") +_TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr") def open_dataset(name: str): - if name not in DATASET_KEYS_AND_OPEN_PATHS: + if name not in _DATASET_KEYS_AND_OPEN_PATHS: raise ValueError( f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True)) ) open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False) # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" - download_dataset_stem, rest = DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1) + download_dataset_stem, rest = _DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1) folder = download_example_dataset(download_dataset_stem) with xr.set_options(use_new_combine_kwarg_defaults=True): ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) - path = TMP_ZARR_FOLDER / f"{name}.zip" + path = _TMP_ZARR_FOLDER / f"{name}.zip" path.parent.mkdir(exist_ok=True) if not path.exists(): with zarr.storage.ZipStore(path, mode="w") as store: From 970968133f35982c50a74b5bf3791004bb07da4e Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:02:02 +0200 Subject: [PATCH 07/42] Make parcels.tutorial public --- src/parcels/__init__.py | 2 +- src/parcels/{_tutorial.py => tutorial.py} | 0 tests/test_index_search.py | 2 +- tests/test_tutorial.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/parcels/{_tutorial.py => tutorial.py} (100%) diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py index c13850a333..7d12abf5ed 100644 --- a/src/parcels/__init__.py +++ b/src/parcels/__init__.py @@ -38,7 +38,7 @@ ParticleSetWarning, ) from parcels._logger import logger -from parcels._tutorial import download_example_dataset, list_example_datasets +from parcels.tutorial import download_example_dataset, list_example_datasets __all__ = [ # noqa: RUF022 # Core classes diff --git a/src/parcels/_tutorial.py b/src/parcels/tutorial.py similarity index 100% rename from src/parcels/_tutorial.py rename to src/parcels/tutorial.py diff --git a/tests/test_index_search.py b/tests/test_index_search.py index 6f20ba1e93..27e8ba04c2 100644 --- a/tests/test_index_search.py +++ b/tests/test_index_search.py @@ -6,8 +6,8 @@ from parcels import Field, XGrid from parcels._core.index_search import _search_indices_curvilinear_2d from parcels._datasets.structured.generic import datasets -from parcels._tutorial import download_example_dataset from parcels.interpolators import XLinear +from parcels.tutorial import download_example_dataset @pytest.fixture diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py index f603899308..4ab1636c2b 100644 --- a/tests/test_tutorial.py +++ b/tests/test_tutorial.py @@ -2,7 +2,7 @@ import requests import xarray as xr -from parcels._tutorial import ( +from parcels.tutorial import ( _get_pooch, download_example_dataset, list_example_datasets, From e4247557e5ca8dcbc5ab21a4b87506be331c6053 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:21:38 +0200 Subject: [PATCH 08/42] Remove `download_example_dataset` and `list_example_datasets` from root API Users must now do `import parcels.tutorial` --- docs/getting_started/tutorial_output.ipynb | 2 +- docs/getting_started/tutorial_quickstart.md | 3 ++- docs/user_guide/examples/explanation_kernelloop.md | 3 ++- docs/user_guide/examples/tutorial_Argofloats.ipynb | 3 ++- docs/user_guide/examples/tutorial_croco_3D.ipynb | 3 ++- docs/user_guide/examples/tutorial_delaystart.ipynb | 3 ++- docs/user_guide/examples/tutorial_diffusion.ipynb | 2 +- docs/user_guide/examples/tutorial_dt_integrators.ipynb | 3 ++- docs/user_guide/examples/tutorial_gsw_density.ipynb | 3 ++- .../examples/tutorial_manipulating_field_data.ipynb | 3 ++- docs/user_guide/examples/tutorial_mitgcm.ipynb | 3 ++- docs/user_guide/examples/tutorial_nemo.ipynb | 4 ++-- docs/user_guide/examples/tutorial_sampling.ipynb | 2 +- src/parcels/__init__.py | 1 - tests/test_advection.py | 7 ++++--- tests/test_convert.py | 7 ++++--- tests/test_particlefile.py | 4 ++-- tests/test_sigmagrids.py | 5 +++-- tests/test_uxarray_fieldset.py | 4 ++-- 19 files changed, 38 insertions(+), 27 deletions(-) diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb index 485c3c0800..3b9eedec56 100644 --- a/docs/getting_started/tutorial_output.ipynb +++ b/docs/getting_started/tutorial_output.ipynb @@ -52,7 +52,7 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/docs/getting_started/tutorial_quickstart.md b/docs/getting_started/tutorial_quickstart.md index 60d5ced099..c806311e2c 100644 --- a/docs/getting_started/tutorial_quickstart.md +++ b/docs/getting_started/tutorial_quickstart.md @@ -20,6 +20,7 @@ and writing output files that can be read with xarray. import numpy as np import xarray as xr import parcels +import parcels.tutorial ``` ## Input flow fields: `FieldSet` @@ -29,7 +30,7 @@ hydrodynamics fields in which the particles are tracked. Here we provide an exam [Global Ocean Physics Reanalysis](https://doi.org/10.48670/moi-00021) from the Copernicus Marine Service. ```{code-cell} -example_dataset_folder = parcels.download_example_dataset( +example_dataset_folder = parcels.tutorial.download_example_dataset( "CopernicusMarine_data_for_Argo_tutorial" ) diff --git a/docs/user_guide/examples/explanation_kernelloop.md b/docs/user_guide/examples/explanation_kernelloop.md index bb2d20743b..582be17ed7 100644 --- a/docs/user_guide/examples/explanation_kernelloop.md +++ b/docs/user_guide/examples/explanation_kernelloop.md @@ -53,9 +53,10 @@ import numpy as np import xarray as xr import parcels +import parcels.tutorial # Load the CopernicusMarine data in the Agulhas region from the example_datasets -example_dataset_folder = parcels.download_example_dataset( +example_dataset_folder = parcels.tutorial.download_example_dataset( "CopernicusMarine_data_for_Argo_tutorial" ) diff --git a/docs/user_guide/examples/tutorial_Argofloats.ipynb b/docs/user_guide/examples/tutorial_Argofloats.ipynb index 0a37193ce7..d0b487a24b 100644 --- a/docs/user_guide/examples/tutorial_Argofloats.ipynb +++ b/docs/user_guide/examples/tutorial_Argofloats.ipynb @@ -110,9 +110,10 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/docs/user_guide/examples/tutorial_croco_3D.ipynb b/docs/user_guide/examples/tutorial_croco_3D.ipynb index a84b0cc742..515a5cde08 100644 --- a/docs/user_guide/examples/tutorial_croco_3D.ipynb +++ b/docs/user_guide/examples/tutorial_croco_3D.ipynb @@ -39,8 +39,9 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", - "data_folder = parcels.download_example_dataset(\"CROCOidealized_data\")\n", + "data_folder = parcels.tutorial.download_example_dataset(\"CROCOidealized_data\")\n", "ds_fields = xr.open_dataset(data_folder / \"CROCO_idealized.nc\")\n", "\n", "ds_fields.load(); # Preload data to speed up access" diff --git a/docs/user_guide/examples/tutorial_delaystart.ipynb b/docs/user_guide/examples/tutorial_delaystart.ipynb index 03bdb6a589..5ca66f4cd9 100644 --- a/docs/user_guide/examples/tutorial_delaystart.ipynb +++ b/docs/user_guide/examples/tutorial_delaystart.ipynb @@ -30,6 +30,7 @@ "from matplotlib.animation import FuncAnimation\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# for interactive display of animations\n", "plt.rcParams[\"animation.html\"] = \"jshtml\"" @@ -50,7 +51,7 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb index 6e42b4ce91..992dcdfa55 100644 --- a/docs/user_guide/examples/tutorial_diffusion.ipynb +++ b/docs/user_guide/examples/tutorial_diffusion.ipynb @@ -467,7 +467,7 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/docs/user_guide/examples/tutorial_dt_integrators.ipynb b/docs/user_guide/examples/tutorial_dt_integrators.ipynb index 2ecf7f81af..3ac6be4e13 100644 --- a/docs/user_guide/examples/tutorial_dt_integrators.ipynb +++ b/docs/user_guide/examples/tutorial_dt_integrators.ipynb @@ -59,9 +59,10 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/docs/user_guide/examples/tutorial_gsw_density.ipynb b/docs/user_guide/examples/tutorial_gsw_density.ipynb index 44b5928a7a..22bbba4ed3 100644 --- a/docs/user_guide/examples/tutorial_gsw_density.ipynb +++ b/docs/user_guide/examples/tutorial_gsw_density.ipynb @@ -36,9 +36,10 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb index f5dc55571b..dc17a0d6d0 100644 --- a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb +++ b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb @@ -43,9 +43,10 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/docs/user_guide/examples/tutorial_mitgcm.ipynb b/docs/user_guide/examples/tutorial_mitgcm.ipynb index e94a0ea885..845d15e361 100644 --- a/docs/user_guide/examples/tutorial_mitgcm.ipynb +++ b/docs/user_guide/examples/tutorial_mitgcm.ipynb @@ -28,8 +28,9 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", - "data_folder = parcels.download_example_dataset(\"MITgcm_example_data\")\n", + "data_folder = parcels.tutorial.download_example_dataset(\"MITgcm_example_data\")\n", "ds_fields = xr.open_dataset(data_folder / \"mitgcm_UV_surface_zonally_reentrant.nc\")" ] }, diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb index fd2f50ab6c..5f27d2af65 100644 --- a/docs/user_guide/examples/tutorial_nemo.ipynb +++ b/docs/user_guide/examples/tutorial_nemo.ipynb @@ -66,7 +66,7 @@ "metadata": {}, "outputs": [], "source": [ - "data_folder = parcels.download_example_dataset(\"NemoCurvilinear_data\")\n", + "data_folder = parcels.tutorial.download_example_dataset(\"NemoCurvilinear_data\")\n", "ds_fields = xr.open_mfdataset(\n", " data_folder.glob(\"*.nc4\"),\n", " data_vars=\"minimal\",\n", @@ -291,7 +291,7 @@ "metadata": {}, "outputs": [], "source": [ - "data_folder = parcels.download_example_dataset(\"NemoNorthSeaORCA025-N006_data\")\n", + "data_folder = parcels.tutorial.download_example_dataset(\"NemoNorthSeaORCA025-N006_data\")\n", "ds_fields = xr.open_mfdataset(\n", " data_folder.glob(\"ORCA*.nc\"),\n", " data_vars=\"minimal\",\n", diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb index 27ff5af9de..1867a5b1ca 100644 --- a/docs/user_guide/examples/tutorial_sampling.ipynb +++ b/docs/user_guide/examples/tutorial_sampling.ipynb @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", + "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", " \"CopernicusMarine_data_for_Argo_tutorial\"\n", ")\n", "\n", diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py index 7d12abf5ed..5330b6212a 100644 --- a/src/parcels/__init__.py +++ b/src/parcels/__init__.py @@ -38,7 +38,6 @@ ParticleSetWarning, ) from parcels._logger import logger -from parcels.tutorial import download_example_dataset, list_example_datasets __all__ = [ # noqa: RUF022 # Core classes diff --git a/tests/test_advection.py b/tests/test_advection.py index c5d6a9ebf4..4898838fa9 100644 --- a/tests/test_advection.py +++ b/tests/test_advection.py @@ -3,6 +3,7 @@ import xarray as xr import parcels +import parcels.tutorial from parcels import ( Field, FieldSet, @@ -455,7 +456,7 @@ def UpdateP(particles, fieldset): # pragma: no cover def test_nemo_curvilinear_fieldset(): - data_folder = parcels.download_example_dataset("NemoCurvilinear_data") + data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data") U = xr.open_mfdataset(data_folder.glob("*U.nc4")) V = xr.open_mfdataset(data_folder.glob("*V.nc4")) coords = xr.open_dataset(data_folder / "mesh_mask.nc4") @@ -476,7 +477,7 @@ def test_nemo_curvilinear_fieldset(): @pytest.mark.parametrize("kernel", [AdvectionRK4, AdvectionRK4_3D]) def test_nemo_3D_curvilinear_fieldset(kernel): - data_folder = parcels.download_example_dataset("NemoNorthSeaORCA025-N006_data") + data_folder = parcels.tutorial.download_example_dataset("NemoNorthSeaORCA025-N006_data") U = xr.open_mfdataset(data_folder.glob("*U.nc")) V = xr.open_mfdataset(data_folder.glob("*V.nc")) W = xr.open_mfdataset(data_folder.glob("*W.nc")) @@ -505,7 +506,7 @@ def test_nemo_3D_curvilinear_fieldset(kernel): def test_mitgcm(): - data_folder = parcels.download_example_dataset("MITgcm_example_data") + data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data") ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc") ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=ds_fields) diff --git a/tests/test_convert.py b/tests/test_convert.py index 15bac3cfbf..17824c7718 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -3,6 +3,7 @@ import parcels import parcels.convert as convert +import parcels.tutorial from parcels import FieldSet from parcels._core.utils import sgrid from parcels._datasets.structured.circulation_models import datasets as datasets_circulation_models @@ -10,7 +11,7 @@ def test_nemo_to_sgrid(): - data_folder = parcels.download_example_dataset("NemoCurvilinear_data") + data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data") U = xr.open_mfdataset(data_folder.glob("*U.nc4")) V = xr.open_mfdataset(data_folder.glob("*V.nc4")) coords = xr.open_dataset(data_folder / "mesh_mask.nc4") @@ -41,7 +42,7 @@ def test_nemo_to_sgrid(): def test_convert_nemo_offsets(): - data_folder = parcels.download_example_dataset("NemoCurvilinear_data") + data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data") U = xr.open_mfdataset(data_folder.glob("*U.nc4")) V = xr.open_mfdataset(data_folder.glob("*V.nc4")) coords = xr.open_dataset(data_folder / "mesh_mask.nc4") @@ -56,7 +57,7 @@ def test_convert_nemo_offsets(): def test_convert_mitgcm_offsets(): - data_folder = parcels.download_example_dataset("MITgcm_example_data") + data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data") ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc") coords = ds_fields[["XG", "YG", "Zl", "time"]] ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=coords) diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py index d642a544c7..43b4afc03a 100755 --- a/tests/test_particlefile.py +++ b/tests/test_particlefile.py @@ -8,6 +8,7 @@ import xarray as xr from zarr.storage import MemoryStore +import parcels.tutorial from parcels import ( Field, FieldSet, @@ -18,7 +19,6 @@ Variable, VectorField, XGrid, - download_example_dataset, ) from parcels._core.particle import Particle, create_particle_data, get_default_particle from parcels._core.utils.time import TimeInterval, timedelta_to_float @@ -442,7 +442,7 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying(): dt = -timedelta(minutes=5) # TODO: Not ideal using the `download_example_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have - example_dataset_folder = download_example_dataset("CopernicusMarine_data_for_Argo_tutorial") + example_dataset_folder = parcels.tutorial.download_example_dataset("CopernicusMarine_data_for_Argo_tutorial") ds_in = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords") fields = {"U": ds_in["uo"], "V": ds_in["vo"]} ds_fset = copernicusmarine_to_sgrid(fields=fields) diff --git a/tests/test_sigmagrids.py b/tests/test_sigmagrids.py index de437c8fba..afedf9de1f 100644 --- a/tests/test_sigmagrids.py +++ b/tests/test_sigmagrids.py @@ -2,6 +2,7 @@ import xarray as xr import parcels +import parcels.tutorial from parcels import Particle, ParticleSet, Variable from parcels.kernels import AdvectionRK4_3D_CROCO, SampleOmegaCroco, convert_z_to_sigma_croco @@ -17,7 +18,7 @@ def test_conversion_3DCROCO(): lat, lon = ds.y_rho.values[y, x], ds.x_rho.values[y, x] ``` """ - data_folder = parcels.download_example_dataset("CROCOidealized_data") + data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data") ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc") fields = { "U": ds_fields["u"], @@ -46,7 +47,7 @@ def test_conversion_3DCROCO(): def test_advection_3DCROCO(): - data_folder = parcels.download_example_dataset("CROCOidealized_data") + data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data") ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc") ds_fields.load() diff --git a/tests/test_uxarray_fieldset.py b/tests/test_uxarray_fieldset.py index 001c411c6c..d4a11f550b 100644 --- a/tests/test_uxarray_fieldset.py +++ b/tests/test_uxarray_fieldset.py @@ -2,6 +2,7 @@ import pytest import uxarray as ux +import parcels.tutorial from parcels import ( Field, FieldSet, @@ -9,7 +10,6 @@ ParticleSet, UxGrid, VectorField, - download_example_dataset, ) from parcels._datasets.unstructured.generic import datasets as datasets_unstructured from parcels.convert import fesom_to_ugrid, icon_to_ugrid @@ -22,7 +22,7 @@ @pytest.fixture def ds_fesom_channel() -> ux.UxDataset: - fesom_path = download_example_dataset("FESOM_periodic_channel") + fesom_path = parcels.tutorial.download_example_dataset("FESOM_periodic_channel") grid_path = f"{fesom_path}/fesom_channel.nc" data_path = [ f"{fesom_path}/u.fesom_channel.nc", From b97918da2a1ab07a8bf43b99da4be29e7305f5f2 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Fri, 10 Apr 2026 12:09:16 +0200 Subject: [PATCH 09/42] debug: Add code_path argument to --- src/parcels/tutorial.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index 56ff0531bd..2a573f667a 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -1,6 +1,7 @@ import os from datetime import datetime, timedelta from pathlib import Path +from typing import Literal import pooch import xarray as xr @@ -231,7 +232,7 @@ def download_example_dataset(dataset: str, data_home=None): _TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr") -def open_dataset(name: str): +def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"): # TODO: Remove code_path arg if name not in _DATASET_KEYS_AND_OPEN_PATHS: raise ValueError( f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True)) @@ -244,6 +245,8 @@ def open_dataset(name: str): with xr.set_options(use_new_combine_kwarg_defaults=True): ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) + if code_path == "nc": + return ds path = _TMP_ZARR_FOLDER / f"{name}.zip" path.parent.mkdir(exist_ok=True) if not path.exists(): From 11daa887fccc6682f3ae6287c01e1c775d644a39 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Fri, 10 Apr 2026 14:09:15 +0200 Subject: [PATCH 10/42] Update docs and tests to use open_dataset --- tests/test_advection.py | 19 ++++++++----------- tests/test_convert.py | 17 +++++++---------- tests/test_index_search.py | 8 ++------ tests/test_particlefile.py | 5 ++--- tests/test_sigmagrids.py | 7 ++----- 5 files changed, 21 insertions(+), 35 deletions(-) diff --git a/tests/test_advection.py b/tests/test_advection.py index 4898838fa9..05450a4a93 100644 --- a/tests/test_advection.py +++ b/tests/test_advection.py @@ -456,10 +456,9 @@ def UpdateP(particles, fieldset): # pragma: no cover def test_nemo_curvilinear_fieldset(): - data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc4")) - V = xr.open_mfdataset(data_folder.glob("*V.nc4")) - coords = xr.open_dataset(data_folder / "mesh_mask.nc4") + U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U") + V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V") + coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") ds = parcels.convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords) @@ -477,11 +476,10 @@ def test_nemo_curvilinear_fieldset(): @pytest.mark.parametrize("kernel", [AdvectionRK4, AdvectionRK4_3D]) def test_nemo_3D_curvilinear_fieldset(kernel): - data_folder = parcels.tutorial.download_example_dataset("NemoNorthSeaORCA025-N006_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc")) - V = xr.open_mfdataset(data_folder.glob("*V.nc")) - W = xr.open_mfdataset(data_folder.glob("*W.nc")) - coords = xr.open_dataset(data_folder / "coordinates.nc", decode_times=False) + U = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/U") + V = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/V") + W = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/W") + coords = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/mesh_mask") ds = parcels.convert.nemo_to_sgrid(fields=dict(U=U["uo"], V=V["vo"], W=W["wo"]), coords=coords) @@ -506,8 +504,7 @@ def test_nemo_3D_curvilinear_fieldset(kernel): def test_mitgcm(): - data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data") - ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc") + ds_fields = parcels.tutorial.open_dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant") ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=ds_fields) fieldset = FieldSet.from_sgrid_conventions(ds_fset) diff --git a/tests/test_convert.py b/tests/test_convert.py index 17824c7718..b286bb2689 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -11,10 +11,9 @@ def test_nemo_to_sgrid(): - data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc4")) - V = xr.open_mfdataset(data_folder.glob("*V.nc4")) - coords = xr.open_dataset(data_folder / "mesh_mask.nc4") + U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U") + V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V") + coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") ds = convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords) @@ -42,10 +41,9 @@ def test_nemo_to_sgrid(): def test_convert_nemo_offsets(): - data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc4")) - V = xr.open_mfdataset(data_folder.glob("*V.nc4")) - coords = xr.open_dataset(data_folder / "mesh_mask.nc4") + U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U") + V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V") + coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") ds = convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords) fieldset = FieldSet.from_sgrid_conventions(ds) @@ -57,8 +55,7 @@ def test_convert_nemo_offsets(): def test_convert_mitgcm_offsets(): - data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data") - ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc") + ds_fields = parcels.tutorial.open_dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant") coords = ds_fields[["XG", "YG", "Zl", "time"]] ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=coords) fieldset = FieldSet.from_sgrid_conventions(ds_fset) diff --git a/tests/test_index_search.py b/tests/test_index_search.py index 27e8ba04c2..7d6eb45bac 100644 --- a/tests/test_index_search.py +++ b/tests/test_index_search.py @@ -1,13 +1,12 @@ import numpy as np import pytest -import xarray as xr import xgcm +import parcels.tutorial from parcels import Field, XGrid from parcels._core.index_search import _search_indices_curvilinear_2d from parcels._datasets.structured.generic import datasets from parcels.interpolators import XLinear -from parcels.tutorial import download_example_dataset @pytest.fixture @@ -56,10 +55,7 @@ def test_grid_indexing_fpoints(field_cone): def test_indexing_nemo_curvilinear(): - data_folder = download_example_dataset("NemoCurvilinear_data") - ds = xr.open_mfdataset( - data_folder.glob("*.nc4"), combine="nested", data_vars="minimal", coords="minimal", compat="override" - ) + ds = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") ds = ds.isel({"time_counter": 0, "time": 0, "z_a": 0}, drop=True).rename( {"glamf": "lon", "gphif": "lat", "z": "depth"} ) diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py index 43b4afc03a..ef4d1b565b 100755 --- a/tests/test_particlefile.py +++ b/tests/test_particlefile.py @@ -441,9 +441,8 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying(): runtime = timedelta(days=2) dt = -timedelta(minutes=5) - # TODO: Not ideal using the `download_example_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have - example_dataset_folder = parcels.tutorial.download_example_dataset("CopernicusMarine_data_for_Argo_tutorial") - ds_in = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords") + # TODO: Not ideal using the `open_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have + ds_in = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc") fields = {"U": ds_in["uo"], "V": ds_in["vo"]} ds_fset = copernicusmarine_to_sgrid(fields=fields) fieldset = FieldSet.from_sgrid_conventions(ds_fset) diff --git a/tests/test_sigmagrids.py b/tests/test_sigmagrids.py index afedf9de1f..537c8c101d 100644 --- a/tests/test_sigmagrids.py +++ b/tests/test_sigmagrids.py @@ -1,5 +1,4 @@ import numpy as np -import xarray as xr import parcels import parcels.tutorial @@ -18,8 +17,7 @@ def test_conversion_3DCROCO(): lat, lon = ds.y_rho.values[y, x], ds.x_rho.values[y, x] ``` """ - data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data") - ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc") + ds_fields = parcels.tutorial.open_dataset("CROCOidealized_data/data") fields = { "U": ds_fields["u"], "V": ds_fields["v"], @@ -47,8 +45,7 @@ def test_conversion_3DCROCO(): def test_advection_3DCROCO(): - data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data") - ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc") + ds_fields = parcels.tutorial.open_dataset("CROCOidealized_data/data") ds_fields.load() fields = { From 332fcab9f4123cbdbcea4e846c582a2c7b32ebdf Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Fri, 10 Apr 2026 14:27:32 +0200 Subject: [PATCH 11/42] Refactor _DATASET_KEYS_AND_OPEN_PATHS to _DATASET_KEYS_AND_CONFIGS --- src/parcels/tutorial.py | 93 +++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 36 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index 2a573f667a..cc78e2e254 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -1,4 +1,6 @@ import os +from collections.abc import Callable +from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path from typing import Literal @@ -108,41 +110,50 @@ "CROCOidealized_data": ["CROCO_idealized.nc"], } + +@dataclass +class DatasetNCtoZarrConfig: + path_relative_to_root: str + + # Function to apply to the dataset before the decoding the CF variables + pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = None + + # The first here is a human readable key, the latter the path to load the netcdf data # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`) # fmt: off -_DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([ - ("MovingEddies_data/P", "MovingEddies_data/moving_eddiesP.nc"), - ("MovingEddies_data/U", "MovingEddies_data/moving_eddiesU.nc"), - ("MovingEddies_data/V", "MovingEddies_data/moving_eddiesV.nc"), - ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), - ("OFAM_example_data/U", "OFAM_example_data/OFAM_simple_U.nc"), - ("OFAM_example_data/V", "OFAM_example_data/OFAM_simple_V.nc"), - ("Peninsula_data/U", "Peninsula_data/peninsulaU.nc"), - ("Peninsula_data/V", "Peninsula_data/peninsulaV.nc"), - ("Peninsula_data/P", "Peninsula_data/peninsulaP.nc"), - ("Peninsula_data/T", "Peninsula_data/peninsulaT.nc"), - ("GlobCurrent_example_data/data.nc", "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), - ("DecayingMovingEddy_data/U", "DecayingMovingEddy_data/decaying_moving_eddyU.nc"), - ("DecayingMovingEddy_data/V", "DecayingMovingEddy_data/decaying_moving_eddyV.nc"), - ("FESOM_periodic_channel/fesom_channel", "FESOM_periodic_channel/fesom_channel.nc"), - ("FESOM_periodic_channel/u.fesom_channel", "FESOM_periodic_channel/u.fesom_channel.nc"), - ("FESOM_periodic_channel/v.fesom_channel", "FESOM_periodic_channel/v.fesom_channel.nc"), - ("FESOM_periodic_channel/w.fesom_channel", "FESOM_periodic_channel/w.fesom_channel.nc"), - ("NemoCurvilinear_data_zonal/U", "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), - ("NemoCurvilinear_data_zonal/V", "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), - ("NemoCurvilinear_data_zonal/mesh_mask", "NemoCurvilinear_data/mesh_mask.nc4"), - ("NemoNorthSeaORCA025-N006_data/U", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), - ("NemoNorthSeaORCA025-N006_data/V", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), - ("NemoNorthSeaORCA025-N006_data/W", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), - ("NemoNorthSeaORCA025-N006_data/mesh_mask", "NemoNorthSeaORCA025-N006_data/coordinates.nc"), +_DATASET_KEYS_AND_CONFIGS: dict[str, DatasetNCtoZarrConfig] = dict([ + ("MovingEddies_data/P", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesP.nc")), + ("MovingEddies_data/U", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesU.nc")), + ("MovingEddies_data/V", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesV.nc")), + ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", DatasetNCtoZarrConfig("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")), + ("OFAM_example_data/U", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_U.nc")), + ("OFAM_example_data/V", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_V.nc")), + ("Peninsula_data/U", DatasetNCtoZarrConfig("Peninsula_data/peninsulaU.nc")), + ("Peninsula_data/V", DatasetNCtoZarrConfig("Peninsula_data/peninsulaV.nc")), + ("Peninsula_data/P", DatasetNCtoZarrConfig("Peninsula_data/peninsulaP.nc")), + ("Peninsula_data/T", DatasetNCtoZarrConfig("Peninsula_data/peninsulaT.nc")), + ("GlobCurrent_example_data/data.nc", DatasetNCtoZarrConfig("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), + ("DecayingMovingEddy_data/U", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyU.nc")), + ("DecayingMovingEddy_data/V", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyV.nc")), + ("FESOM_periodic_channel/fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/fesom_channel.nc")), + ("FESOM_periodic_channel/u.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/u.fesom_channel.nc")), + ("FESOM_periodic_channel/v.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/v.fesom_channel.nc")), + ("FESOM_periodic_channel/w.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/w.fesom_channel.nc")), + ("NemoCurvilinear_data_zonal/U", DatasetNCtoZarrConfig("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")), + ("NemoCurvilinear_data_zonal/V", DatasetNCtoZarrConfig("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")), + ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4")), + ("NemoNorthSeaORCA025-N006_data/U", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")), + ("NemoNorthSeaORCA025-N006_data/V", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")), + ("NemoNorthSeaORCA025-N006_data/W", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")), + ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc")), # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 - ("SWASH_data/data", "SWASH_data/field_00655*.nc"), - ("WOA_data/data", "WOA_data/woa18_decav_t*_04.nc"), - ("CROCOidealized_data/data", "CROCOidealized_data/CROCO_idealized.nc"), + ("SWASH_data/data", DatasetNCtoZarrConfig("SWASH_data/field_00655*.nc")), + ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc")), + ("CROCOidealized_data/data", DatasetNCtoZarrConfig("CROCOidealized_data/CROCO_idealized.nc")), ]) # fmt: on @@ -186,8 +197,8 @@ def list_example_datasets(v4=False) -> list[str]: # TODO: Remove v4 flag when m The names of the available example datasets. """ if v4: - return list(_DATASET_KEYS_AND_OPEN_PATHS.keys()) - return list(set(v.split("/")[0] for v in _DATASET_KEYS_AND_OPEN_PATHS.values())) + return list(_DATASET_KEYS_AND_CONFIGS.keys()) + return list(set(v.path_relative_to_root.split("/")[0] for v in _DATASET_KEYS_AND_CONFIGS.values())) def download_example_dataset(dataset: str, data_home=None): @@ -233,18 +244,28 @@ def download_example_dataset(dataset: str, data_home=None): def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"): # TODO: Remove code_path arg - if name not in _DATASET_KEYS_AND_OPEN_PATHS: + try: + cfg = _DATASET_KEYS_AND_CONFIGS[name] + except KeyError as e: raise ValueError( f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True)) - ) + ) from e open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False) + open_dataset_kwargs = dict(decode_cf=False) # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" - download_dataset_stem, rest = _DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1) + download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1) folder = download_example_dataset(download_dataset_stem) with xr.set_options(use_new_combine_kwarg_defaults=True): + # return f"{folder}/{rest}" ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) + + if cfg.pre_decode_cf_callable is not None: + ds = cfg.pre_decode_cf_callable(ds) + + ds = xr.decode_cf(ds) + if code_path == "nc": return ds path = _TMP_ZARR_FOLDER / f"{name}.zip" From f8df2386d0aa73f172d11aec41e8f3dabdbcd025 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:26:28 +0200 Subject: [PATCH 12/42] Fix dataset ingestion --- src/parcels/tutorial.py | 23 ++++++++++++++++++++--- tests/test_index_search.py | 4 +--- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index cc78e2e254..e5800d1851 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -119,6 +119,23 @@ class DatasetNCtoZarrConfig: pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = None +def _preprocess_drop_time_from_mesh1(ds: xr.Dataset) -> xr.Dataset: + # For some reason on the mesh "NemoNorthSeaORCA025-N006_data/coordinates.nc" there are time dimensions. These dimension also has broken cf-time metadata + # this fixes that + return ds.isel(time=0).drop(["time", "time_steps"]) + + +def _preprocess_drop_time_from_mesh2(ds: xr.Dataset) -> xr.Dataset: + # For some reason on the mesh "NemoCurvilinear_data_zonal/mesh_mask" there is a time dimension. + return ds.isel(time=0).drop(["time"]) + + +def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: + # For some reason "WOA_data/woa18_decav_t*_04.nc" looks to be simulation data using CF time (i.e., months of 30 days), however the calendar attribute isn't set. + ds.time.attrs.update({"calendar": "360_day"}) + return ds + + # The first here is a human readable key, the latter the path to load the netcdf data # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`) # fmt: off @@ -145,14 +162,14 @@ class DatasetNCtoZarrConfig: ("FESOM_periodic_channel/w.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/w.fesom_channel.nc")), ("NemoCurvilinear_data_zonal/U", DatasetNCtoZarrConfig("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")), ("NemoCurvilinear_data_zonal/V", DatasetNCtoZarrConfig("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")), - ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4")), + ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)), ("NemoNorthSeaORCA025-N006_data/U", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")), ("NemoNorthSeaORCA025-N006_data/V", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")), ("NemoNorthSeaORCA025-N006_data/W", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")), - ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc")), + ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)), # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 ("SWASH_data/data", DatasetNCtoZarrConfig("SWASH_data/field_00655*.nc")), - ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc")), + ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)), ("CROCOidealized_data/data", DatasetNCtoZarrConfig("CROCOidealized_data/CROCO_idealized.nc")), ]) # fmt: on diff --git a/tests/test_index_search.py b/tests/test_index_search.py index 7d6eb45bac..bb7ec3f3b1 100644 --- a/tests/test_index_search.py +++ b/tests/test_index_search.py @@ -56,9 +56,7 @@ def test_grid_indexing_fpoints(field_cone): def test_indexing_nemo_curvilinear(): ds = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") - ds = ds.isel({"time_counter": 0, "time": 0, "z_a": 0}, drop=True).rename( - {"glamf": "lon", "gphif": "lat", "z": "depth"} - ) + ds = ds.isel({"z_a": 0}, drop=True).rename({"glamf": "lon", "gphif": "lat", "z": "depth"}) xgcm_grid = xgcm.Grid(ds, coords={"X": {"left": "x"}, "Y": {"left": "y"}}, periodic=False, autoparse_metadata=False) grid = XGrid(xgcm_grid, mesh="spherical") From 7c212ff78b39134099cd80839b2f141f325ae671 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:31:36 +0200 Subject: [PATCH 13/42] Remove nc code path was used for testing --- src/parcels/tutorial.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index e5800d1851..7cdcc345a4 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -3,7 +3,6 @@ from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path -from typing import Literal import pooch import xarray as xr @@ -260,7 +259,7 @@ def download_example_dataset(dataset: str, data_home=None): _TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr") -def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"): # TODO: Remove code_path arg +def open_dataset(name: str): # TODO: Remove code_path arg try: cfg = _DATASET_KEYS_AND_CONFIGS[name] except KeyError as e: @@ -268,7 +267,6 @@ def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"): # TODO: R f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True)) ) from e - open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False) open_dataset_kwargs = dict(decode_cf=False) # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1) @@ -283,14 +281,12 @@ def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"): # TODO: R ds = xr.decode_cf(ds) - if code_path == "nc": - return ds path = _TMP_ZARR_FOLDER / f"{name}.zip" - path.parent.mkdir(exist_ok=True) + path.parent.mkdir(exist_ok=True, parents=True) if not path.exists(): with zarr.storage.ZipStore(path, mode="w") as store: ds.to_zarr(store) - return xr.open_zarr(path, **open_dataset_kwargs) + return xr.open_zarr(path) def _v4_compat_patch(fname, action, pup): From fdccf31e99b1916bdff323a79b6c2532525504bd Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:36:34 +0200 Subject: [PATCH 14/42] Cleanup --- src/parcels/tutorial.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index 7cdcc345a4..e78d4b8629 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -268,12 +268,11 @@ def open_dataset(name: str): # TODO: Remove code_path arg ) from e open_dataset_kwargs = dict(decode_cf=False) - # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" + assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1) folder = download_example_dataset(download_dataset_stem) with xr.set_options(use_new_combine_kwarg_defaults=True): - # return f"{folder}/{rest}" ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) if cfg.pre_decode_cf_callable is not None: From 788313d22c5c10467e9ac807f2acd9bd2fdf42ee Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:40:26 +0200 Subject: [PATCH 15/42] Lift constant definition --- src/parcels/tutorial.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index e78d4b8629..b4c297628a 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -18,6 +18,11 @@ _DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data" +DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA") +if DATA_HOME is None: + DATA_HOME = pooch.os_cache("parcels") + + # Keys are the dataset names. Values are the filenames in the dataset folder. Note that # you can specify subfolders in the dataset folder putting slashes in the filename list. # e.g., @@ -191,10 +196,7 @@ def _create_pooch_registry() -> dict[str, None]: def _get_pooch(data_home=None): if data_home is None: - data_home = os.environ.get("PARCELS_EXAMPLE_DATA") - if data_home is None: - data_home = pooch.os_cache("parcels") - + data_home = DATA_HOME return pooch.create( path=data_home, base_url=_DATA_URL, From 110f5d656dc017059ead924b5cdef99c15e4db77 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:46:10 +0200 Subject: [PATCH 16/42] Remove data_home option from download_dataset --- src/parcels/tutorial.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index b4c297628a..d362c21bb3 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -194,11 +194,9 @@ def _create_pooch_registry() -> dict[str, None]: POOCH_REGISTRY = _create_pooch_registry() -def _get_pooch(data_home=None): - if data_home is None: - data_home = DATA_HOME +def _get_pooch(): return pooch.create( - path=data_home, + path=DATA_HOME, base_url=_DATA_URL, registry=POOCH_REGISTRY, ) @@ -219,20 +217,18 @@ def list_example_datasets(v4=False) -> list[str]: # TODO: Remove v4 flag when m return list(set(v.path_relative_to_root.split("/")[0] for v in _DATASET_KEYS_AND_CONFIGS.values())) -def download_example_dataset(dataset: str, data_home=None): +def download_example_dataset(dataset: str): """Load an example dataset from the parcels website. This function provides quick access to a small number of example datasets that are useful in documentation and testing in parcels. + The location where the data is downloaded can be set using the environment variable PARCELS_EXAMPLE_DATA . + Parameters ---------- dataset : str Name of the dataset to load. - data_home : pathlike, optional - The directory in which to cache data. If not specified, the value - of the ``PARCELS_EXAMPLE_DATA`` environment variable, if any, is used. - Otherwise the default location is assigned by :func:`get_data_home`. Returns ------- @@ -244,7 +240,7 @@ def download_example_dataset(dataset: str, data_home=None): raise ValueError( f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys()) ) - odie = _get_pooch(data_home=data_home) + odie = _get_pooch() cache_folder = Path(odie.path) dataset_folder = cache_folder / dataset From da3975d8ff3b84b7cacea4295ac52a1eade15612 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:48:58 +0200 Subject: [PATCH 17/42] Make pooch instance a constant --- src/parcels/tutorial.py | 28 ++++++++++++---------------- tests/test_tutorial.py | 4 ++-- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index d362c21bb3..ebe33da28c 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -18,9 +18,9 @@ _DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data" -DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA") -if DATA_HOME is None: - DATA_HOME = pooch.os_cache("parcels") +_DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA") +if _DATA_HOME is None: + _DATA_HOME = pooch.os_cache("parcels") # Keys are the dataset names. Values are the filenames in the dataset folder. Note that @@ -191,15 +191,12 @@ def _create_pooch_registry() -> dict[str, None]: return registry -POOCH_REGISTRY = _create_pooch_registry() - - -def _get_pooch(): - return pooch.create( - path=DATA_HOME, - base_url=_DATA_URL, - registry=POOCH_REGISTRY, - ) +_POOCH_REGISTRY = _create_pooch_registry() +_ODIE = pooch.create( + path=_DATA_HOME, + base_url=_DATA_URL, + registry=_POOCH_REGISTRY, +) def list_example_datasets(v4=False) -> list[str]: # TODO: Remove v4 flag when migrating to open_dataset @@ -240,15 +237,14 @@ def download_example_dataset(dataset: str): raise ValueError( f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys()) ) - odie = _get_pooch() - cache_folder = Path(odie.path) + cache_folder = Path(_ODIE.path) dataset_folder = cache_folder / dataset - for file_name in odie.registry: + for file_name in _ODIE.registry: if file_name.startswith(dataset): should_patch = dataset == "GlobCurrent_example_data" - odie.fetch(file_name, processor=_v4_compat_patch if should_patch else None) + _ODIE.fetch(file_name, processor=_v4_compat_patch if should_patch else None) return dataset_folder diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py index 4ab1636c2b..9ae0b88aa7 100644 --- a/tests/test_tutorial.py +++ b/tests/test_tutorial.py @@ -3,14 +3,14 @@ import xarray as xr from parcels.tutorial import ( - _get_pooch, + _ODIE, download_example_dataset, list_example_datasets, open_dataset, ) -@pytest.mark.parametrize("url", [_get_pooch().get_url(filename) for filename in _get_pooch().registry.keys()]) +@pytest.mark.parametrize("url", [_ODIE.get_url(filename) for filename in _ODIE.registry.keys()]) def test_pooch_registry_url_reponse(url): response = requests.head(url) assert not (400 <= response.status_code < 600) From ee23d52e44951c0fd1349bb378be2bde461fb030 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:59:26 +0200 Subject: [PATCH 18/42] Fix test failures --- tests/test_tutorial.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py index 9ae0b88aa7..a958949514 100644 --- a/tests/test_tutorial.py +++ b/tests/test_tutorial.py @@ -10,6 +10,12 @@ ) +@pytest.fixture(scope="function") +def tmp_path_parcels_example_data(monkeypatch, tmp_path): + monkeypatch.setenv("PARCELS_EXAMPLE_DATA", str(tmp_path)) + return tmp_path + + @pytest.mark.parametrize("url", [_ODIE.get_url(filename) for filename in _ODIE.registry.keys()]) def test_pooch_registry_url_reponse(url): response = requests.head(url) @@ -17,17 +23,16 @@ def test_pooch_registry_url_reponse(url): @pytest.mark.parametrize("dataset", list_example_datasets()[:1]) -def test_download_example_dataset_folder_creation(tmp_path, dataset): - dataset_folder_path = download_example_dataset(dataset, data_home=tmp_path) +def test_download_example_dataset_folder_creation(dataset): + dataset_folder_path = download_example_dataset(dataset) assert dataset_folder_path.exists() assert dataset_folder_path.name == dataset - assert dataset_folder_path.parent == tmp_path -def test_download_non_existing_example_dataset(tmp_path): +def test_download_non_existing_example_dataset(tmp_path_parcels_example_data): with pytest.raises(ValueError): - download_example_dataset("non_existing_dataset", data_home=tmp_path) + download_example_dataset("non_existing_dataset") def test_download_example_dataset_no_data_home(): From 7c67e89a11db948dc6665d8beb0a03aa0e67bb79 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 11:23:17 +0200 Subject: [PATCH 19/42] Refactor --- src/parcels/tutorial.py | 180 ++++++++++++++++++++++------------------ 1 file changed, 97 insertions(+), 83 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index ebe33da28c..d092d9bd0e 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -1,12 +1,11 @@ +import abc import os from collections.abc import Callable -from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path import pooch import xarray as xr -import zarr from parcels._v3to4 import patch_dataset_v4_compat @@ -115,12 +114,66 @@ } -@dataclass -class DatasetNCtoZarrConfig: - path_relative_to_root: str +def _create_pooch_registry() -> dict[str, None]: + """Collapses the mapping of dataset names to filenames into a pooch registry. + + Hashes are set to None for all files. + """ + registry: dict[str, None] = {} + for dataset, filenames in _EXAMPLE_DATA_FILES.items(): + for filename in filenames: + registry[f"{dataset}/{filename}"] = None + return registry + + +_POOCH_REGISTRY = _create_pooch_registry() +_ODIE = pooch.create( + path=_DATA_HOME, + base_url=_DATA_URL, + registry=_POOCH_REGISTRY, +) + + +class _ParcelsDataset(abc.ABC): + @abc.abstractmethod + def open_dataset(self) -> xr.Dataset: ... + + +class _V3Dataset(_ParcelsDataset): + def __init__(self, path_relative_to_root: str, pre_decode_cf_callable=None): + self.path_relative_to_root = path_relative_to_root # glob is allowed + + # Function to apply to the dataset before the decoding the CF variables + self.pup = _ODIE + self.pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = pre_decode_cf_callable + self.v3_dataset_name = path_relative_to_root.split("/")[0] + + def open_dataset(self) -> xr.Dataset: + self.download_relevant_files() + with xr.set_options(use_new_combine_kwarg_defaults=True): + ds = xr.open_mfdataset(Path(self.pup.path) / self.path_relative_to_root, decode_cf=False) + + if self.pre_decode_cf_callable is not None: + ds = self.pre_decode_cf_callable(ds) + + ds = xr.decode_cf(ds) + return ds + + def download_relevant_files(self) -> None: + for file in self.pup.registry: + if self.v3_dataset_name in file: + self.pup.fetch(file) + return + - # Function to apply to the dataset before the decoding the CF variables - pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = None +class _ZarrZipDataset(_ParcelsDataset): + def __init__(self, path_relative_to_root): + self.pup = _ODIE + self.path_relative_to_root = path_relative_to_root + + def open_dataset(self) -> xr.Dataset: + self.pup.fetch(self.path_relative_to_root) + return xr.open_zarr(Path(self.pup.path) / self.path_relative_to_root) def _preprocess_drop_time_from_mesh1(ds: xr.Dataset) -> xr.Dataset: @@ -143,62 +196,42 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: # The first here is a human readable key, the latter the path to load the netcdf data # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`) # fmt: off -_DATASET_KEYS_AND_CONFIGS: dict[str, DatasetNCtoZarrConfig] = dict([ - ("MovingEddies_data/P", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesP.nc")), - ("MovingEddies_data/U", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesU.nc")), - ("MovingEddies_data/V", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesV.nc")), - ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", DatasetNCtoZarrConfig("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")), - ("OFAM_example_data/U", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_U.nc")), - ("OFAM_example_data/V", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_V.nc")), - ("Peninsula_data/U", DatasetNCtoZarrConfig("Peninsula_data/peninsulaU.nc")), - ("Peninsula_data/V", DatasetNCtoZarrConfig("Peninsula_data/peninsulaV.nc")), - ("Peninsula_data/P", DatasetNCtoZarrConfig("Peninsula_data/peninsulaP.nc")), - ("Peninsula_data/T", DatasetNCtoZarrConfig("Peninsula_data/peninsulaT.nc")), - ("GlobCurrent_example_data/data.nc", DatasetNCtoZarrConfig("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), - ("DecayingMovingEddy_data/U", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyU.nc")), - ("DecayingMovingEddy_data/V", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyV.nc")), - ("FESOM_periodic_channel/fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/fesom_channel.nc")), - ("FESOM_periodic_channel/u.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/u.fesom_channel.nc")), - ("FESOM_periodic_channel/v.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/v.fesom_channel.nc")), - ("FESOM_periodic_channel/w.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/w.fesom_channel.nc")), - ("NemoCurvilinear_data_zonal/U", DatasetNCtoZarrConfig("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")), - ("NemoCurvilinear_data_zonal/V", DatasetNCtoZarrConfig("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")), - ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)), - ("NemoNorthSeaORCA025-N006_data/U", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")), - ("NemoNorthSeaORCA025-N006_data/V", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")), - ("NemoNorthSeaORCA025-N006_data/W", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")), - ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)), +_DATASET_KEYS_AND_CONFIGS: dict[str, _V3Dataset] = dict([ + ("MovingEddies_data/P", _V3Dataset("MovingEddies_data/moving_eddiesP.nc")), + ("MovingEddies_data/U", _V3Dataset("MovingEddies_data/moving_eddiesU.nc")), + ("MovingEddies_data/V", _V3Dataset("MovingEddies_data/moving_eddiesV.nc")), + ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", _V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")), + ("OFAM_example_data/U", _V3Dataset("OFAM_example_data/OFAM_simple_U.nc")), + ("OFAM_example_data/V", _V3Dataset("OFAM_example_data/OFAM_simple_V.nc")), + ("Peninsula_data/U", _V3Dataset("Peninsula_data/peninsulaU.nc")), + ("Peninsula_data/V", _V3Dataset("Peninsula_data/peninsulaV.nc")), + ("Peninsula_data/P", _V3Dataset("Peninsula_data/peninsulaP.nc")), + ("Peninsula_data/T", _V3Dataset("Peninsula_data/peninsulaT.nc")), + ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), + ("DecayingMovingEddy_data/U", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc")), + ("DecayingMovingEddy_data/V", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc")), + ("FESOM_periodic_channel/fesom_channel", _V3Dataset("FESOM_periodic_channel/fesom_channel.nc")), + ("FESOM_periodic_channel/u.fesom_channel", _V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc")), + ("FESOM_periodic_channel/v.fesom_channel", _V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc")), + ("FESOM_periodic_channel/w.fesom_channel", _V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc")), + ("NemoCurvilinear_data_zonal/U", _V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")), + ("NemoCurvilinear_data_zonal/V", _V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")), + ("NemoCurvilinear_data_zonal/mesh_mask", _V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)), + ("NemoNorthSeaORCA025-N006_data/U", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")), + ("NemoNorthSeaORCA025-N006_data/V", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")), + ("NemoNorthSeaORCA025-N006_data/W", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")), + ("NemoNorthSeaORCA025-N006_data/mesh_mask", _V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)), # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 - ("SWASH_data/data", DatasetNCtoZarrConfig("SWASH_data/field_00655*.nc")), - ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)), - ("CROCOidealized_data/data", DatasetNCtoZarrConfig("CROCOidealized_data/CROCO_idealized.nc")), + ("SWASH_data/data", _V3Dataset("SWASH_data/field_00655*.nc")), + ("WOA_data/data", _V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)), + ("CROCOidealized_data/data", _V3Dataset("CROCOidealized_data/CROCO_idealized.nc")), ]) # fmt: on -def _create_pooch_registry() -> dict[str, None]: - """Collapses the mapping of dataset names to filenames into a pooch registry. - - Hashes are set to None for all files. - """ - registry: dict[str, None] = {} - for dataset, filenames in _EXAMPLE_DATA_FILES.items(): - for filename in filenames: - registry[f"{dataset}/{filename}"] = None - return registry - - -_POOCH_REGISTRY = _create_pooch_registry() -_ODIE = pooch.create( - path=_DATA_HOME, - base_url=_DATA_URL, - registry=_POOCH_REGISTRY, -) - - def list_example_datasets(v4=False) -> list[str]: # TODO: Remove v4 flag when migrating to open_dataset """List the available example datasets. @@ -249,37 +282,18 @@ def download_example_dataset(dataset: str): return dataset_folder -# Just creating a temp folder to help during the migration -_TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr") - - -def open_dataset(name: str): # TODO: Remove code_path arg +def open_dataset(name: str): try: - cfg = _DATASET_KEYS_AND_CONFIGS[name] + dataset_config = _DATASET_KEYS_AND_CONFIGS[name] except KeyError as e: raise ValueError( f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True)) ) from e + assert not name.endswith((".zarr", ".zip", ".nc")), ( + "Dataset name should not have suffix" + ) # TODO: Move to test_tutorial - open_dataset_kwargs = dict(decode_cf=False) - assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" - download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1) - folder = download_example_dataset(download_dataset_stem) - - with xr.set_options(use_new_combine_kwarg_defaults=True): - ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs) - - if cfg.pre_decode_cf_callable is not None: - ds = cfg.pre_decode_cf_callable(ds) - - ds = xr.decode_cf(ds) - - path = _TMP_ZARR_FOLDER / f"{name}.zip" - path.parent.mkdir(exist_ok=True, parents=True) - if not path.exists(): - with zarr.storage.ZipStore(path, mode="w") as store: - ds.to_zarr(store) - return xr.open_zarr(path) + return dataset_config.open_dataset() def _v4_compat_patch(fname, action, pup): From 89d48c97c08d481e1f185924443b9908c6af1530 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 11:50:30 +0200 Subject: [PATCH 20/42] Refactor and remove legacy tooling --- src/parcels/tutorial.py | 237 +++++++++++++++------------------------- tests/test_tutorial.py | 40 ++----- 2 files changed, 97 insertions(+), 180 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index d092d9bd0e..0a127f5e80 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -9,7 +9,7 @@ from parcels._v3to4 import patch_dataset_v4_compat -__all__ = ["download_example_dataset", "list_example_datasets"] +__all__ = ["list_datasets", "open_dataset"] # When modifying existing datasets in a backwards incompatible way, # make a new release in the repo and update the DATA_REPO_TAG to the new tag @@ -34,99 +34,88 @@ # └── file2.nc # # See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets -_EXAMPLE_DATA_FILES: dict[str, list[str]] = { - "MovingEddies_data": [ - "moving_eddiesP.nc", - "moving_eddiesU.nc", - "moving_eddiesV.nc", - ], - "MITgcm_example_data": ["mitgcm_UV_surface_zonally_reentrant.nc"], - "OFAM_example_data": ["OFAM_simple_U.nc", "OFAM_simple_V.nc"], - "Peninsula_data": [ - "peninsulaU.nc", - "peninsulaV.nc", - "peninsulaP.nc", - "peninsulaT.nc", - ], - "GlobCurrent_example_data": [ - f"{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc" +_POOCH_REGISTRY_FILES: list[str] = ( + [ + "MovingEddies_data/moving_eddiesP.nc", + "MovingEddies_data/moving_eddiesU.nc", + "MovingEddies_data/moving_eddiesV.nc", + ] + + ["MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"] + + ["OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"] + + [ + "Peninsula_data/peninsulaU.nc", + "Peninsula_data/peninsulaV.nc", + "Peninsula_data/peninsulaP.nc", + "Peninsula_data/peninsulaT.nc", + ] + + [ + f"GlobCurrent_example_data/{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc" for date in ([datetime(2002, 1, 1) + timedelta(days=x) for x in range(0, 365)] + [datetime(2003, 1, 1)]) - ], - "CopernicusMarine_data_for_Argo_tutorial": [ - "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - ], - "DecayingMovingEddy_data": [ - "decaying_moving_eddyU.nc", - "decaying_moving_eddyV.nc", - ], - "FESOM_periodic_channel": [ - "fesom_channel.nc", - "u.fesom_channel.nc", - "v.fesom_channel.nc", - "w.fesom_channel.nc", - ], - "NemoCurvilinear_data": [ - "U_purely_zonal-ORCA025_grid_U.nc4", - "V_purely_zonal-ORCA025_grid_V.nc4", - "mesh_mask.nc4", - ], - "NemoNorthSeaORCA025-N006_data": [ - "ORCA025-N06_20000104d05U.nc", - "ORCA025-N06_20000109d05U.nc", - "ORCA025-N06_20000114d05U.nc", - "ORCA025-N06_20000119d05U.nc", - "ORCA025-N06_20000124d05U.nc", - "ORCA025-N06_20000129d05U.nc", - "ORCA025-N06_20000104d05V.nc", - "ORCA025-N06_20000109d05V.nc", - "ORCA025-N06_20000114d05V.nc", - "ORCA025-N06_20000119d05V.nc", - "ORCA025-N06_20000124d05V.nc", - "ORCA025-N06_20000129d05V.nc", - "ORCA025-N06_20000104d05W.nc", - "ORCA025-N06_20000109d05W.nc", - "ORCA025-N06_20000114d05W.nc", - "ORCA025-N06_20000119d05W.nc", - "ORCA025-N06_20000124d05W.nc", - "ORCA025-N06_20000129d05W.nc", - "coordinates.nc", - ], - "POPSouthernOcean_data": [ - "t.x1_SAMOC_flux.169000.nc", - "t.x1_SAMOC_flux.169001.nc", - "t.x1_SAMOC_flux.169002.nc", - "t.x1_SAMOC_flux.169003.nc", - "t.x1_SAMOC_flux.169004.nc", - "t.x1_SAMOC_flux.169005.nc", - ], - "SWASH_data": [ - "field_0065532.nc", - "field_0065537.nc", - "field_0065542.nc", - "field_0065548.nc", - "field_0065552.nc", - "field_0065557.nc", - ], - "WOA_data": [f"woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)], - "CROCOidealized_data": ["CROCO_idealized.nc"], -} - - -def _create_pooch_registry() -> dict[str, None]: - """Collapses the mapping of dataset names to filenames into a pooch registry. - - Hashes are set to None for all files. - """ - registry: dict[str, None] = {} - for dataset, filenames in _EXAMPLE_DATA_FILES.items(): - for filename in filenames: - registry[f"{dataset}/{filename}"] = None - return registry + ] + + [ + "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + ] + + [ + "DecayingMovingEddy_data/decaying_moving_eddyU.nc", + "DecayingMovingEddy_data/decaying_moving_eddyV.nc", + ] + + [ + "FESOM_periodic_channel/fesom_channel.nc", + "FESOM_periodic_channel/u.fesom_channel.nc", + "FESOM_periodic_channel/v.fesom_channel.nc", + "FESOM_periodic_channel/w.fesom_channel.nc", + ] + + [ + "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4", + "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4", + "NemoCurvilinear_data/mesh_mask.nc4", + ] + + [ + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05U.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05U.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05U.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05U.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05U.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05U.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05V.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05V.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05V.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05V.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05V.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05V.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05W.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05W.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05W.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05W.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05W.nc", + "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05W.nc", + "NemoNorthSeaORCA025-N006_data/coordinates.nc", + ] + + [ + "POPSouthernOcean_data/t.x1_SAMOC_flux.169000.nc", + "POPSouthernOcean_data/t.x1_SAMOC_flux.169001.nc", + "POPSouthernOcean_data/t.x1_SAMOC_flux.169002.nc", + "POPSouthernOcean_data/t.x1_SAMOC_flux.169003.nc", + "POPSouthernOcean_data/t.x1_SAMOC_flux.169004.nc", + "POPSouthernOcean_data/t.x1_SAMOC_flux.169005.nc", + ] + + [ + "SWASH_data/field_0065532.nc", + "SWASH_data/field_0065537.nc", + "SWASH_data/field_0065542.nc", + "SWASH_data/field_0065548.nc", + "SWASH_data/field_0065552.nc", + "SWASH_data/field_0065557.nc", + ] + + [f"WOA_data/woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)] + + ["CROCOidealized_data/CROCO_idealized.nc"] +) + +_POOCH_REGISTRY = {k: None for k in _POOCH_REGISTRY_FILES} -_POOCH_REGISTRY = _create_pooch_registry() _ODIE = pooch.create( path=_DATA_HOME, base_url=_DATA_URL, @@ -151,7 +140,7 @@ def __init__(self, path_relative_to_root: str, pre_decode_cf_callable=None): def open_dataset(self) -> xr.Dataset: self.download_relevant_files() with xr.set_options(use_new_combine_kwarg_defaults=True): - ds = xr.open_mfdataset(Path(self.pup.path) / self.path_relative_to_root, decode_cf=False) + ds = xr.open_mfdataset(f"{self.pup.path}/{self.path_relative_to_root}", decode_cf=False) if self.pre_decode_cf_callable is not None: ds = self.pre_decode_cf_callable(ds) @@ -193,8 +182,7 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: return ds -# The first here is a human readable key, the latter the path to load the netcdf data -# (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`) +# The first here is a human readable key used to open datasets, with an object to open the datasets # fmt: off _DATASET_KEYS_AND_CONFIGS: dict[str, _V3Dataset] = dict([ ("MovingEddies_data/P", _V3Dataset("MovingEddies_data/moving_eddiesP.nc")), @@ -207,7 +195,7 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: ("Peninsula_data/V", _V3Dataset("Peninsula_data/peninsulaV.nc")), ("Peninsula_data/P", _V3Dataset("Peninsula_data/peninsulaP.nc")), ("Peninsula_data/T", _V3Dataset("Peninsula_data/peninsulaT.nc")), - ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")), + ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat)), ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), @@ -232,77 +220,26 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: # fmt: on -def list_example_datasets(v4=False) -> list[str]: # TODO: Remove v4 flag when migrating to open_dataset +def list_datasets() -> list[str]: # TODO: Remove v4 flag when migrating to open_dataset """List the available example datasets. - Use :func:`download_example_dataset` to download one of the datasets. + Use :func:`open_dataset` to download and open one of the datasets. Returns ------- datasets : list of str The names of the available example datasets. """ - if v4: - return list(_DATASET_KEYS_AND_CONFIGS.keys()) - return list(set(v.path_relative_to_root.split("/")[0] for v in _DATASET_KEYS_AND_CONFIGS.values())) - - -def download_example_dataset(dataset: str): - """Load an example dataset from the parcels website. - - This function provides quick access to a small number of example datasets - that are useful in documentation and testing in parcels. - - The location where the data is downloaded can be set using the environment variable PARCELS_EXAMPLE_DATA . - - Parameters - ---------- - dataset : str - Name of the dataset to load. - - Returns - ------- - dataset_folder : Path - Path to the folder containing the downloaded dataset files. - """ - # Dev note: `dataset` is assumed to be a folder name with netcdf files - if dataset not in _EXAMPLE_DATA_FILES: - raise ValueError( - f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys()) - ) - - cache_folder = Path(_ODIE.path) - dataset_folder = cache_folder / dataset - - for file_name in _ODIE.registry: - if file_name.startswith(dataset): - should_patch = dataset == "GlobCurrent_example_data" - _ODIE.fetch(file_name, processor=_v4_compat_patch if should_patch else None) - - return dataset_folder + return list(_DATASET_KEYS_AND_CONFIGS.keys()) def open_dataset(name: str): try: dataset_config = _DATASET_KEYS_AND_CONFIGS[name] except KeyError as e: - raise ValueError( - f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True)) - ) from e + raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e assert not name.endswith((".zarr", ".zip", ".nc")), ( "Dataset name should not have suffix" ) # TODO: Move to test_tutorial return dataset_config.open_dataset() - - -def _v4_compat_patch(fname, action, pup): - """ - Patch the GlobCurrent example dataset to be compatible with v4. - - See https://www.fatiando.org/pooch/latest/processors.html#creating-your-own-processors - """ - if action == "fetch": - return fname - xr.load_dataset(fname).pipe(patch_dataset_v4_compat).to_netcdf(fname) - return fname diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py index a958949514..847cbb3ea4 100644 --- a/tests/test_tutorial.py +++ b/tests/test_tutorial.py @@ -2,49 +2,29 @@ import requests import xarray as xr -from parcels.tutorial import ( - _ODIE, - download_example_dataset, - list_example_datasets, - open_dataset, -) +import parcels.tutorial -@pytest.fixture(scope="function") +@pytest.fixture(scope="function", autouse=True) def tmp_path_parcels_example_data(monkeypatch, tmp_path): monkeypatch.setenv("PARCELS_EXAMPLE_DATA", str(tmp_path)) return tmp_path -@pytest.mark.parametrize("url", [_ODIE.get_url(filename) for filename in _ODIE.registry.keys()]) +@pytest.mark.parametrize( + "url", [parcels.tutorial._ODIE.get_url(filename) for filename in parcels.tutorial._ODIE.registry.keys()] +) def test_pooch_registry_url_reponse(url): response = requests.head(url) assert not (400 <= response.status_code < 600) -@pytest.mark.parametrize("dataset", list_example_datasets()[:1]) -def test_download_example_dataset_folder_creation(dataset): - dataset_folder_path = download_example_dataset(dataset) - - assert dataset_folder_path.exists() - assert dataset_folder_path.name == dataset - - -def test_download_non_existing_example_dataset(tmp_path_parcels_example_data): - with pytest.raises(ValueError): - download_example_dataset("non_existing_dataset") - - -def test_download_example_dataset_no_data_home(): - # This test depends on your default data_home location and whether - # it's okay to download files there. Be careful with this test in a CI environment. - dataset = list_example_datasets()[0] - dataset_folder_path = download_example_dataset(dataset) - assert dataset_folder_path.exists() - assert dataset_folder_path.name == dataset +def test_open_dataset_non_existing(): + with pytest.raises(ValueError, match="Dataset.*not found"): + parcels.tutorial.open_dataset("non_existing_dataset") -@pytest.mark.parametrize("name", list_example_datasets(v4=True)) +@pytest.mark.parametrize("name", parcels.tutorial.list_datasets()) def test_open_dataset(name): - ds = open_dataset(name) + ds = parcels.tutorial.open_dataset(name) assert isinstance(ds, xr.Dataset) From b36cc8e4bfb18dc72c7bd4985585463991fef043 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 11:56:00 +0200 Subject: [PATCH 21/42] Update pooch registry --- src/parcels/tutorial.py | 127 +++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 68 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index 0a127f5e80..3e5c0be0b8 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -15,102 +15,93 @@ # make a new release in the repo and update the DATA_REPO_TAG to the new tag _DATA_REPO_TAG = "main" -_DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data" +_DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}" _DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA") if _DATA_HOME is None: _DATA_HOME = pooch.os_cache("parcels") - -# Keys are the dataset names. Values are the filenames in the dataset folder. Note that -# you can specify subfolders in the dataset folder putting slashes in the filename list. -# e.g., -# "my_dataset": ["file0.nc", "folder1/file1.nc", "folder2/file2.nc"] -# my_dataset/ -# ├── file0.nc -# ├── folder1/ -# │ └── file1.nc -# └── folder2/ -# └── file2.nc -# # See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets _POOCH_REGISTRY_FILES: list[str] = ( + # These datasets are from v3 and before of Parcels, where we just used netcdf files [ - "MovingEddies_data/moving_eddiesP.nc", - "MovingEddies_data/moving_eddiesU.nc", - "MovingEddies_data/moving_eddiesV.nc", + "data/MovingEddies_data/moving_eddiesP.nc", + "data/MovingEddies_data/moving_eddiesU.nc", + "data/MovingEddies_data/moving_eddiesV.nc", ] - + ["MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"] - + ["OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"] + + ["data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"] + + ["data/OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"] + [ - "Peninsula_data/peninsulaU.nc", - "Peninsula_data/peninsulaV.nc", - "Peninsula_data/peninsulaP.nc", - "Peninsula_data/peninsulaT.nc", + "data/Peninsula_data/peninsulaU.nc", + "data/Peninsula_data/peninsulaV.nc", + "data/Peninsula_data/peninsulaP.nc", + "data/Peninsula_data/peninsulaT.nc", ] + [ - f"GlobCurrent_example_data/{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc" + f"data/GlobCurrent_example_data/{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc" for date in ([datetime(2002, 1, 1) + timedelta(days=x) for x in range(0, 365)] + [datetime(2003, 1, 1)]) ] + [ - "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", ] + [ - "DecayingMovingEddy_data/decaying_moving_eddyU.nc", - "DecayingMovingEddy_data/decaying_moving_eddyV.nc", + "data/DecayingMovingEddy_data/decaying_moving_eddyU.nc", + "data/DecayingMovingEddy_data/decaying_moving_eddyV.nc", ] + [ - "FESOM_periodic_channel/fesom_channel.nc", - "FESOM_periodic_channel/u.fesom_channel.nc", - "FESOM_periodic_channel/v.fesom_channel.nc", - "FESOM_periodic_channel/w.fesom_channel.nc", + "data/FESOM_periodic_channel/fesom_channel.nc", + "data/FESOM_periodic_channel/u.fesom_channel.nc", + "data/FESOM_periodic_channel/v.fesom_channel.nc", + "data/FESOM_periodic_channel/w.fesom_channel.nc", ] + [ - "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4", - "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4", - "NemoCurvilinear_data/mesh_mask.nc4", + "data/NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4", + "data/NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4", + "data/NemoCurvilinear_data/mesh_mask.nc4", ] + [ - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05U.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05U.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05U.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05U.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05U.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05U.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05V.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05V.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05V.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05V.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05V.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05V.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05W.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05W.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05W.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05W.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05W.nc", - "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05W.nc", - "NemoNorthSeaORCA025-N006_data/coordinates.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/coordinates.nc", ] + [ - "POPSouthernOcean_data/t.x1_SAMOC_flux.169000.nc", - "POPSouthernOcean_data/t.x1_SAMOC_flux.169001.nc", - "POPSouthernOcean_data/t.x1_SAMOC_flux.169002.nc", - "POPSouthernOcean_data/t.x1_SAMOC_flux.169003.nc", - "POPSouthernOcean_data/t.x1_SAMOC_flux.169004.nc", - "POPSouthernOcean_data/t.x1_SAMOC_flux.169005.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169000.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169001.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169002.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169003.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169004.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169005.nc", ] + [ - "SWASH_data/field_0065532.nc", - "SWASH_data/field_0065537.nc", - "SWASH_data/field_0065542.nc", - "SWASH_data/field_0065548.nc", - "SWASH_data/field_0065552.nc", - "SWASH_data/field_0065557.nc", + "data/SWASH_data/field_0065532.nc", + "data/SWASH_data/field_0065537.nc", + "data/SWASH_data/field_0065542.nc", + "data/SWASH_data/field_0065548.nc", + "data/SWASH_data/field_0065552.nc", + "data/SWASH_data/field_0065557.nc", ] - + [f"WOA_data/woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)] - + ["CROCOidealized_data/CROCO_idealized.nc"] + + [f"data/WOA_data/woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)] + + ["data/CROCOidealized_data/CROCO_idealized.nc"] + # These datasets are from v4 of Parcels where we're opting for Zipped zarr datasets + # ... ) _POOCH_REGISTRY = {k: None for k in _POOCH_REGISTRY_FILES} From 0fd9dbd87ce77f44ed75fc926a5a313de213eab7 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 11:57:45 +0200 Subject: [PATCH 22/42] Move test --- src/parcels/tutorial.py | 3 --- tests/test_tutorial.py | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index 3e5c0be0b8..d52534531d 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -229,8 +229,5 @@ def open_dataset(name: str): dataset_config = _DATASET_KEYS_AND_CONFIGS[name] except KeyError as e: raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e - assert not name.endswith((".zarr", ".zip", ".nc")), ( - "Dataset name should not have suffix" - ) # TODO: Move to test_tutorial return dataset_config.open_dataset() diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py index 847cbb3ea4..d25ab0cad4 100644 --- a/tests/test_tutorial.py +++ b/tests/test_tutorial.py @@ -28,3 +28,8 @@ def test_open_dataset_non_existing(): def test_open_dataset(name): ds = parcels.tutorial.open_dataset(name) assert isinstance(ds, xr.Dataset) + + +@pytest.mark.parametrize("name", parcels.tutorial.list_datasets()) +def test_dataset_keys(name): + assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" From d408232103dae5e93796b748f3768d30f5414a90 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:02:53 +0200 Subject: [PATCH 23/42] Add purpose to datasets --- src/parcels/tutorial.py | 69 ++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index d52534531d..cca64b28ed 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -1,4 +1,5 @@ import abc +import enum import os from collections.abc import Callable from datetime import datetime, timedelta @@ -175,38 +176,42 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: # The first here is a human readable key used to open datasets, with an object to open the datasets # fmt: off -_DATASET_KEYS_AND_CONFIGS: dict[str, _V3Dataset] = dict([ - ("MovingEddies_data/P", _V3Dataset("MovingEddies_data/moving_eddiesP.nc")), - ("MovingEddies_data/U", _V3Dataset("MovingEddies_data/moving_eddiesU.nc")), - ("MovingEddies_data/V", _V3Dataset("MovingEddies_data/moving_eddiesV.nc")), - ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", _V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")), - ("OFAM_example_data/U", _V3Dataset("OFAM_example_data/OFAM_simple_U.nc")), - ("OFAM_example_data/V", _V3Dataset("OFAM_example_data/OFAM_simple_V.nc")), - ("Peninsula_data/U", _V3Dataset("Peninsula_data/peninsulaU.nc")), - ("Peninsula_data/V", _V3Dataset("Peninsula_data/peninsulaV.nc")), - ("Peninsula_data/P", _V3Dataset("Peninsula_data/peninsulaP.nc")), - ("Peninsula_data/T", _V3Dataset("Peninsula_data/peninsulaT.nc")), - ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat)), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")), - ("DecayingMovingEddy_data/U", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc")), - ("DecayingMovingEddy_data/V", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc")), - ("FESOM_periodic_channel/fesom_channel", _V3Dataset("FESOM_periodic_channel/fesom_channel.nc")), - ("FESOM_periodic_channel/u.fesom_channel", _V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc")), - ("FESOM_periodic_channel/v.fesom_channel", _V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc")), - ("FESOM_periodic_channel/w.fesom_channel", _V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc")), - ("NemoCurvilinear_data_zonal/U", _V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")), - ("NemoCurvilinear_data_zonal/V", _V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")), - ("NemoCurvilinear_data_zonal/mesh_mask", _V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)), - ("NemoNorthSeaORCA025-N006_data/U", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")), - ("NemoNorthSeaORCA025-N006_data/V", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")), - ("NemoNorthSeaORCA025-N006_data/W", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")), - ("NemoNorthSeaORCA025-N006_data/mesh_mask", _V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)), +class _Purpose(enum.Enum): + TESTING = enum.auto() + TUTORIAL = enum.auto() + +_DATASET_KEYS_AND_CONFIGS: dict[str, tuple[_V3Dataset, _Purpose]] = dict([ + ("MovingEddies_data/P", (_V3Dataset("MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)), + ("MovingEddies_data/U", (_V3Dataset("MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)), + ("MovingEddies_data/V", (_V3Dataset("MovingEddies_data/moving_eddiesV.nc"), _Purpose.TUTORIAL)), + ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", (_V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), _Purpose.TUTORIAL)), + ("OFAM_example_data/U", (_V3Dataset("OFAM_example_data/OFAM_simple_U.nc"), _Purpose.TUTORIAL)), + ("OFAM_example_data/V", (_V3Dataset("OFAM_example_data/OFAM_simple_V.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/U", (_V3Dataset("Peninsula_data/peninsulaU.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/V", (_V3Dataset("Peninsula_data/peninsulaV.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/P", (_V3Dataset("Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/T", (_V3Dataset("Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)), + ("GlobCurrent_example_data/data", (_V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), + ("DecayingMovingEddy_data/U", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)), + ("DecayingMovingEddy_data/V", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/u.fesom_channel", (_V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/v.fesom_channel", (_V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/w.fesom_channel", (_V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/U", (_V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/V", (_V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/mesh_mask", (_V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/U", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)), # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 - ("SWASH_data/data", _V3Dataset("SWASH_data/field_00655*.nc")), - ("WOA_data/data", _V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)), - ("CROCOidealized_data/data", _V3Dataset("CROCOidealized_data/CROCO_idealized.nc")), + ("SWASH_data/data", (_V3Dataset("SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)), + ("WOA_data/data", (_V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)), + ("CROCOidealized_data/data", (_V3Dataset("CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)), ]) # fmt: on @@ -226,7 +231,7 @@ def list_datasets() -> list[str]: # TODO: Remove v4 flag when migrating to open def open_dataset(name: str): try: - dataset_config = _DATASET_KEYS_AND_CONFIGS[name] + dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0] except KeyError as e: raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e From adff0aa7e64252081d47c4679d1f459a39aa95fe Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:03:56 +0200 Subject: [PATCH 24/42] Move file --- src/parcels/{tutorial.py => _datasets/remote.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/parcels/{tutorial.py => _datasets/remote.py} (100%) diff --git a/src/parcels/tutorial.py b/src/parcels/_datasets/remote.py similarity index 100% rename from src/parcels/tutorial.py rename to src/parcels/_datasets/remote.py From c6adb753bb89349d26a341d2675886cb32848555 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:16:08 +0200 Subject: [PATCH 25/42] Move test file --- .../{test_tutorial.py => datasets/test_remote.py} | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) rename tests/{test_tutorial.py => datasets/test_remote.py} (62%) diff --git a/tests/test_tutorial.py b/tests/datasets/test_remote.py similarity index 62% rename from tests/test_tutorial.py rename to tests/datasets/test_remote.py index d25ab0cad4..be2ebd89c4 100644 --- a/tests/test_tutorial.py +++ b/tests/datasets/test_remote.py @@ -2,7 +2,7 @@ import requests import xarray as xr -import parcels.tutorial +import parcels._datasets.remote as remote @pytest.fixture(scope="function", autouse=True) @@ -11,9 +11,7 @@ def tmp_path_parcels_example_data(monkeypatch, tmp_path): return tmp_path -@pytest.mark.parametrize( - "url", [parcels.tutorial._ODIE.get_url(filename) for filename in parcels.tutorial._ODIE.registry.keys()] -) +@pytest.mark.parametrize("url", [remote._ODIE.get_url(filename) for filename in remote._ODIE.registry.keys()]) def test_pooch_registry_url_reponse(url): response = requests.head(url) assert not (400 <= response.status_code < 600) @@ -21,15 +19,15 @@ def test_pooch_registry_url_reponse(url): def test_open_dataset_non_existing(): with pytest.raises(ValueError, match="Dataset.*not found"): - parcels.tutorial.open_dataset("non_existing_dataset") + remote.open_dataset("non_existing_dataset") -@pytest.mark.parametrize("name", parcels.tutorial.list_datasets()) +@pytest.mark.parametrize("name", remote.list_datasets()) def test_open_dataset(name): - ds = parcels.tutorial.open_dataset(name) + ds = remote.open_dataset(name) assert isinstance(ds, xr.Dataset) -@pytest.mark.parametrize("name", parcels.tutorial.list_datasets()) +@pytest.mark.parametrize("name", remote.list_datasets()) def test_dataset_keys(name): assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" From 3b751590a9b944c5d55a944303e13321010fa2ff Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:21:54 +0200 Subject: [PATCH 26/42] Update list_datasets and open_dataset --- src/parcels/_datasets/remote.py | 55 +++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index cca64b28ed..331a0f9c25 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -4,6 +4,7 @@ from collections.abc import Callable from datetime import datetime, timedelta from pathlib import Path +from typing import Literal import pooch import xarray as xr @@ -174,12 +175,15 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: return ds -# The first here is a human readable key used to open datasets, with an object to open the datasets -# fmt: off class _Purpose(enum.Enum): - TESTING = enum.auto() - TUTORIAL = enum.auto() + TESTING = "testing" + TUTORIAL = "tutorial" + + +_TPurpose = Literal["testing", "tutorial"] +# The first here is a human readable key used to open datasets, with an object to open the datasets +# fmt: off _DATASET_KEYS_AND_CONFIGS: dict[str, tuple[_V3Dataset, _Purpose]] = dict([ ("MovingEddies_data/P", (_V3Dataset("MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)), ("MovingEddies_data/U", (_V3Dataset("MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)), @@ -216,23 +220,54 @@ class _Purpose(enum.Enum): # fmt: on -def list_datasets() -> list[str]: # TODO: Remove v4 flag when migrating to open_dataset - """List the available example datasets. +def list_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]: + """List the available remote datasets. Use :func:`open_dataset` to download and open one of the datasets. + Parameters + ---------- + purpose : {'any', 'testing', 'tutorial'}, optional + Filter datasets by purpose. Use ``'any'`` (default) to return all + datasets, ``'tutorial'`` for tutorial datasets, or ``'testing'`` for + datasets used in tests. + Returns ------- datasets : list of str - The names of the available example datasets. + The names of the available datasets matching the given purpose. """ - return list(_DATASET_KEYS_AND_CONFIGS.keys()) + if purpose == "any": + return list(_DATASET_KEYS_AND_CONFIGS.keys()) + purpose_enum = _Purpose(purpose) + return [k for (k, (_, p)) in _DATASET_KEYS_AND_CONFIGS.items() if p == purpose_enum] -def open_dataset(name: str): + +def open_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"): + """Download and open a remote dataset as an :class:`xarray.Dataset`. + + Use :func:`list_datasets` to see the available dataset names. + + Parameters + ---------- + name : str + Name of the dataset to open. Must be one of the keys returned by + :func:`list_datasets`. + purpose : {'any', 'testing', 'tutorial'}, optional + Purpose filter used to populate the error message when ``name`` is not + found. Defaults to ``'any'``. + + Returns + ------- + xarray.Dataset + The requested dataset. + """ try: dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0] except KeyError as e: - raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e + raise ValueError( + f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets(purpose=purpose)) + ) from e return dataset_config.open_dataset() From 89f8202c9f88ee8d1374a883a63be38c1190d8d9 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:22:02 +0200 Subject: [PATCH 27/42] Fix registry entry --- src/parcels/_datasets/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index 331a0f9c25..ac038ef83e 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -32,7 +32,7 @@ "data/MovingEddies_data/moving_eddiesV.nc", ] + ["data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"] - + ["data/OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"] + + ["data/OFAM_example_data/OFAM_simple_U.nc", "data/OFAM_example_data/OFAM_simple_V.nc"] + [ "data/Peninsula_data/peninsulaU.nc", "data/Peninsula_data/peninsulaV.nc", From ec32d2a9764bea6af70d352bedfc0f7f5cea748f Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:24:51 +0200 Subject: [PATCH 28/42] Add thin parcels.tutorial wrapper --- src/parcels/tutorial.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/parcels/tutorial.py diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py new file mode 100644 index 0000000000..2ac1780b91 --- /dev/null +++ b/src/parcels/tutorial.py @@ -0,0 +1,36 @@ +from parcels._datasets.remote import list_datasets as _remote_list_datasets +from parcels._datasets.remote import open_dataset as _remote_open_dataset + +__all__ = ["list_datasets", "open_dataset"] + + +def list_datasets() -> list[str]: + """List the available tutorial datasets. + + Use :func:`open_dataset` to download and open one of the datasets. + + Returns + ------- + datasets : list of str + The names of the available datasets matching the given purpose. + """ + return _remote_list_datasets(purpose="tutorial") + + +def open_dataset(name: str): + """Download and open a tutorial dataset as an :class:`xarray.Dataset`. + + Use :func:`list_datasets` to see the available dataset names. + + Parameters + ---------- + name : str + Name of the dataset to open. Must be one of the keys returned by + :func:`list_datasets`. + + Returns + ------- + xarray.Dataset + The requested dataset. + """ + return _remote_open_dataset(name, purpose="tutorial") From 04ce890d1f0444c46c759c245b2c86f89a15d2fc Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:26:28 +0200 Subject: [PATCH 29/42] Update comment --- src/parcels/_datasets/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index ac038ef83e..7618c2c053 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -212,7 +212,7 @@ class _Purpose(enum.Enum): ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)), ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)), ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)), - # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 + # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should not be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 ("SWASH_data/data", (_V3Dataset("SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)), ("WOA_data/data", (_V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)), ("CROCOidealized_data/data", (_V3Dataset("CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)), From 7c107d8d08a006133a66e67ebbbeb33d00b3d47e Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:31:15 +0200 Subject: [PATCH 30/42] Add test_list_datasets --- tests/datasets/test_remote.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/datasets/test_remote.py b/tests/datasets/test_remote.py index be2ebd89c4..3aa739c42f 100644 --- a/tests/datasets/test_remote.py +++ b/tests/datasets/test_remote.py @@ -31,3 +31,12 @@ def test_open_dataset(name): @pytest.mark.parametrize("name", remote.list_datasets()) def test_dataset_keys(name): assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" + + +def test_list_datasets(): + tutorial_datasets = set(remote.list_datasets("tutorial")) + testing_datasets = set(remote.list_datasets("testing")) + all_datasets = set(remote.list_datasets("any")) + assert tutorial_datasets.issubset(all_datasets) + assert testing_datasets.issubset(all_datasets) + assert tutorial_datasets | testing_datasets == all_datasets From 1cc975027d14df24800986646b13695129146a49 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:25:31 +0200 Subject: [PATCH 31/42] Fix relative paths --- src/parcels/_datasets/remote.py | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index 7618c2c053..a96bb7aeaf 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -185,37 +185,37 @@ class _Purpose(enum.Enum): # The first here is a human readable key used to open datasets, with an object to open the datasets # fmt: off _DATASET_KEYS_AND_CONFIGS: dict[str, tuple[_V3Dataset, _Purpose]] = dict([ - ("MovingEddies_data/P", (_V3Dataset("MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)), - ("MovingEddies_data/U", (_V3Dataset("MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)), - ("MovingEddies_data/V", (_V3Dataset("MovingEddies_data/moving_eddiesV.nc"), _Purpose.TUTORIAL)), - ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", (_V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), _Purpose.TUTORIAL)), - ("OFAM_example_data/U", (_V3Dataset("OFAM_example_data/OFAM_simple_U.nc"), _Purpose.TUTORIAL)), - ("OFAM_example_data/V", (_V3Dataset("OFAM_example_data/OFAM_simple_V.nc"), _Purpose.TUTORIAL)), - ("Peninsula_data/U", (_V3Dataset("Peninsula_data/peninsulaU.nc"), _Purpose.TUTORIAL)), - ("Peninsula_data/V", (_V3Dataset("Peninsula_data/peninsulaV.nc"), _Purpose.TUTORIAL)), - ("Peninsula_data/P", (_V3Dataset("Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)), - ("Peninsula_data/T", (_V3Dataset("Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)), - ("GlobCurrent_example_data/data", (_V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), - ("DecayingMovingEddy_data/U", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)), - ("DecayingMovingEddy_data/V", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)), - ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)), - ("FESOM_periodic_channel/u.fesom_channel", (_V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc"), _Purpose.TUTORIAL)), - ("FESOM_periodic_channel/v.fesom_channel", (_V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc"), _Purpose.TUTORIAL)), - ("FESOM_periodic_channel/w.fesom_channel", (_V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc"), _Purpose.TUTORIAL)), - ("NemoCurvilinear_data_zonal/U", (_V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), _Purpose.TUTORIAL)), - ("NemoCurvilinear_data_zonal/V", (_V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), _Purpose.TUTORIAL)), - ("NemoCurvilinear_data_zonal/mesh_mask", (_V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2), _Purpose.TUTORIAL)), - ("NemoNorthSeaORCA025-N006_data/U", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), _Purpose.TUTORIAL)), - ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)), - ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)), - ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)), + ("MovingEddies_data/P", (_V3Dataset("data/MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)), + ("MovingEddies_data/U", (_V3Dataset("data/MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)), + ("MovingEddies_data/V", (_V3Dataset("data/MovingEddies_data/moving_eddiesV.nc"), _Purpose.TUTORIAL)), + ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", (_V3Dataset("data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), _Purpose.TUTORIAL)), + ("OFAM_example_data/U", (_V3Dataset("data/OFAM_example_data/OFAM_simple_U.nc"), _Purpose.TUTORIAL)), + ("OFAM_example_data/V", (_V3Dataset("data/OFAM_example_data/OFAM_simple_V.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/U", (_V3Dataset("data/Peninsula_data/peninsulaU.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/V", (_V3Dataset("data/Peninsula_data/peninsulaV.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/P", (_V3Dataset("data/Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/T", (_V3Dataset("data/Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)), + ("GlobCurrent_example_data/data", (_V3Dataset("data/GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), + ("DecayingMovingEddy_data/U", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)), + ("DecayingMovingEddy_data/V", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/u.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/u.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/v.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/v.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/w.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/w.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/U", (_V3Dataset("data/NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/V", (_V3Dataset("data/NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/mesh_mask", (_V3Dataset("data/NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/U", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)), # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should not be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 - ("SWASH_data/data", (_V3Dataset("SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)), - ("WOA_data/data", (_V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)), - ("CROCOidealized_data/data", (_V3Dataset("CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)), + ("SWASH_data/data", (_V3Dataset("data/SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)), + ("WOA_data/data", (_V3Dataset("data/WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)), + ("CROCOidealized_data/data", (_V3Dataset("data/CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)), ]) # fmt: on From 38404a38de84bccf8a77df7ce640025f826dd5bd Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:45:29 +0200 Subject: [PATCH 32/42] Update references in codebase --- docs/getting_started/tutorial_output.ipynb | 16 +++++++--- docs/getting_started/tutorial_quickstart.md | 10 +++--- .../examples/explanation_kernelloop.md | 10 +++--- .../examples/tutorial_Argofloats.ipynb | 16 +++++++--- .../examples/tutorial_croco_3D.ipynb | 12 +------ .../examples/tutorial_delaystart.ipynb | 16 +++++++--- .../examples/tutorial_diffusion.ipynb | 18 +++++++---- .../examples/tutorial_dt_integrators.ipynb | 16 +++++++--- .../examples/tutorial_gsw_density.ipynb | 16 +++++++--- .../tutorial_manipulating_field_data.ipynb | 16 +++++++--- .../user_guide/examples/tutorial_mitgcm.ipynb | 5 +-- docs/user_guide/examples/tutorial_nemo.ipynb | 32 +++++++++---------- .../examples/tutorial_sampling.ipynb | 16 +++++++--- src/parcels/__init__.py | 2 -- tests/test_uxarray_fieldset.py | 16 +++++++--- 15 files changed, 136 insertions(+), 81 deletions(-) diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb index 3b9eedec56..6baf5a9438 100644 --- a/docs/getting_started/tutorial_output.ipynb +++ b/docs/getting_started/tutorial_output.ipynb @@ -52,11 +52,19 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", diff --git a/docs/getting_started/tutorial_quickstart.md b/docs/getting_started/tutorial_quickstart.md index c806311e2c..4c0bf02ac9 100644 --- a/docs/getting_started/tutorial_quickstart.md +++ b/docs/getting_started/tutorial_quickstart.md @@ -30,11 +30,11 @@ hydrodynamics fields in which the particles are tracked. Here we provide an exam [Global Ocean Physics Reanalysis](https://doi.org/10.48670/moi-00021) from the Copernicus Marine Service. ```{code-cell} -example_dataset_folder = parcels.tutorial.download_example_dataset( - "CopernicusMarine_data_for_Argo_tutorial" -) - -ds_fields = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords") +ds_fields = xr.merge([ + parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"), + parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"), + parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"), +]) ds_fields.load() # load the dataset into memory ds_fields ``` diff --git a/docs/user_guide/examples/explanation_kernelloop.md b/docs/user_guide/examples/explanation_kernelloop.md index 582be17ed7..994cbd6228 100644 --- a/docs/user_guide/examples/explanation_kernelloop.md +++ b/docs/user_guide/examples/explanation_kernelloop.md @@ -56,11 +56,11 @@ import parcels import parcels.tutorial # Load the CopernicusMarine data in the Agulhas region from the example_datasets -example_dataset_folder = parcels.tutorial.download_example_dataset( - "CopernicusMarine_data_for_Argo_tutorial" -) - -ds_fields = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords") +ds_fields = xr.merge([ + parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"), + parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"), + parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"), +]) ds_fields.load() # load the dataset into memory # Create an idealised wind field and add it to the dataset diff --git a/docs/user_guide/examples/tutorial_Argofloats.ipynb b/docs/user_guide/examples/tutorial_Argofloats.ipynb index d0b487a24b..232c3c8b60 100644 --- a/docs/user_guide/examples/tutorial_Argofloats.ipynb +++ b/docs/user_guide/examples/tutorial_Argofloats.ipynb @@ -113,12 +113,20 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ")\n", "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", - "\n", "# TODO check how we can get good performance without loading full dataset in memory\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/user_guide/examples/tutorial_croco_3D.ipynb b/docs/user_guide/examples/tutorial_croco_3D.ipynb index 515a5cde08..7ea1424398 100644 --- a/docs/user_guide/examples/tutorial_croco_3D.ipynb +++ b/docs/user_guide/examples/tutorial_croco_3D.ipynb @@ -34,17 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import xarray as xr\n", - "\n", - "import parcels\n", - "import parcels.tutorial\n", - "\n", - "data_folder = parcels.tutorial.download_example_dataset(\"CROCOidealized_data\")\n", - "ds_fields = xr.open_dataset(data_folder / \"CROCO_idealized.nc\")\n", - "\n", - "ds_fields.load(); # Preload data to speed up access" + "import matplotlib.pyplot as plt\nimport numpy as np\nimport xarray as xr\n\nimport parcels\nimport parcels.tutorial\n\nds_fields = parcels.tutorial.open_dataset(\"CROCOidealized_data/data\")\n\nds_fields.load(); # Preload data to speed up access" ] }, { diff --git a/docs/user_guide/examples/tutorial_delaystart.ipynb b/docs/user_guide/examples/tutorial_delaystart.ipynb index 5ca66f4cd9..57d4eb6296 100644 --- a/docs/user_guide/examples/tutorial_delaystart.ipynb +++ b/docs/user_guide/examples/tutorial_delaystart.ipynb @@ -51,11 +51,19 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb index 992dcdfa55..ef2fba7611 100644 --- a/docs/user_guide/examples/tutorial_diffusion.ipynb +++ b/docs/user_guide/examples/tutorial_diffusion.ipynb @@ -467,12 +467,18 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", - ")\n", - "\n", - "ds_fields = xr.open_mfdataset(\n", - " f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ").isel(depth=slice(0, 1))\n", "ds_fields.load() # load the dataset into memory" ] diff --git a/docs/user_guide/examples/tutorial_dt_integrators.ipynb b/docs/user_guide/examples/tutorial_dt_integrators.ipynb index 3ac6be4e13..6dd0ed9563 100644 --- a/docs/user_guide/examples/tutorial_dt_integrators.ipynb +++ b/docs/user_guide/examples/tutorial_dt_integrators.ipynb @@ -62,11 +62,19 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", diff --git a/docs/user_guide/examples/tutorial_gsw_density.ipynb b/docs/user_guide/examples/tutorial_gsw_density.ipynb index 22bbba4ed3..5528051e32 100644 --- a/docs/user_guide/examples/tutorial_gsw_density.ipynb +++ b/docs/user_guide/examples/tutorial_gsw_density.ipynb @@ -39,12 +39,20 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ")\n", "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", - "\n", "# TODO check how we can get good performance without loading full dataset in memory\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb index dc17a0d6d0..cf085ade90 100644 --- a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb +++ b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb @@ -46,11 +46,19 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Create an idealised wind field and add it to the dataset\n", diff --git a/docs/user_guide/examples/tutorial_mitgcm.ipynb b/docs/user_guide/examples/tutorial_mitgcm.ipynb index 845d15e361..ae46c7fce2 100644 --- a/docs/user_guide/examples/tutorial_mitgcm.ipynb +++ b/docs/user_guide/examples/tutorial_mitgcm.ipynb @@ -30,8 +30,9 @@ "import parcels\n", "import parcels.tutorial\n", "\n", - "data_folder = parcels.tutorial.download_example_dataset(\"MITgcm_example_data\")\n", - "ds_fields = xr.open_dataset(data_folder / \"mitgcm_UV_surface_zonally_reentrant.nc\")" + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant\"\n", + ")" ] }, { diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb index 5f27d2af65..0824bd54e0 100644 --- a/docs/user_guide/examples/tutorial_nemo.ipynb +++ b/docs/user_guide/examples/tutorial_nemo.ipynb @@ -66,15 +66,13 @@ "metadata": {}, "outputs": [], "source": [ - "data_folder = parcels.tutorial.download_example_dataset(\"NemoCurvilinear_data\")\n", - "ds_fields = xr.open_mfdataset(\n", - " data_folder.glob(\"*.nc4\"),\n", - " data_vars=\"minimal\",\n", - " coords=\"minimal\",\n", - " compat=\"override\",\n", - ")\n", - "\n", - "ds_coords = xr.open_dataset(data_folder / \"mesh_mask.nc4\", decode_times=False)\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/U\"),\n", + " parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/V\"),\n", + " ]\n", + ") # TODO: Fix grid staggering (using SGRID metadata)\n", + "ds_coords = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/mesh_mask\")\n", "ds_fset = parcels.convert.nemo_to_sgrid(\n", " fields=dict(U=ds_fields[\"U\"], V=ds_fields[\"V\"]), coords=ds_coords\n", ")\n", @@ -291,14 +289,14 @@ "metadata": {}, "outputs": [], "source": [ - "data_folder = parcels.tutorial.download_example_dataset(\"NemoNorthSeaORCA025-N006_data\")\n", - "ds_fields = xr.open_mfdataset(\n", - " data_folder.glob(\"ORCA*.nc\"),\n", - " data_vars=\"minimal\",\n", - " coords=\"minimal\",\n", - " compat=\"override\",\n", - ")\n", - "ds_coords = xr.open_dataset(data_folder / \"coordinates.nc\", decode_times=False)\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/U\"),\n", + " parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/V\"),\n", + " parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/W\"),\n", + " ]\n", + ") # TODO: Fix grid staggering (using SGRID metadata)\n", + "ds_coords = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/mesh_mask\")\n", "ds_fset = parcels.convert.nemo_to_sgrid(\n", " fields={\"U\": ds_fields[\"uo\"], \"V\": ds_fields[\"vo\"], \"W\": ds_fields[\"wo\"]},\n", " coords=ds_coords,\n", diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb index 1867a5b1ca..8cdb02fc11 100644 --- a/docs/user_guide/examples/tutorial_sampling.ipynb +++ b/docs/user_guide/examples/tutorial_sampling.ipynb @@ -59,11 +59,19 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.tutorial.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = xr.merge(\n", + " [\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", + " ),\n", + " parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", + " ),\n", + " ]\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py index 5330b6212a..2a7854cded 100644 --- a/src/parcels/__init__.py +++ b/src/parcels/__init__.py @@ -67,8 +67,6 @@ "ParticleSetWarning", # Utilities "logger", - "download_example_dataset", - "list_example_datasets", ] _stdlib_warnings.warn( diff --git a/tests/test_uxarray_fieldset.py b/tests/test_uxarray_fieldset.py index d4a11f550b..8393424ee9 100644 --- a/tests/test_uxarray_fieldset.py +++ b/tests/test_uxarray_fieldset.py @@ -1,7 +1,10 @@ +from pathlib import Path + import numpy as np import pytest import uxarray as ux +import parcels._datasets.remote as _parcels_remote import parcels.tutorial from parcels import ( Field, @@ -22,12 +25,15 @@ @pytest.fixture def ds_fesom_channel() -> ux.UxDataset: - fesom_path = parcels.tutorial.download_example_dataset("FESOM_periodic_channel") - grid_path = f"{fesom_path}/fesom_channel.nc" + # Download FESOM files via the new tutorial API + parcels.tutorial.open_dataset("FESOM_periodic_channel/fesom_channel") + # uxarray requires file paths; access the downloaded files from the pooch cache + _fesom_dir = Path(_parcels_remote._DATA_HOME) / "data" / "FESOM_periodic_channel" + grid_path = str(_fesom_dir / "fesom_channel.nc") data_path = [ - f"{fesom_path}/u.fesom_channel.nc", - f"{fesom_path}/v.fesom_channel.nc", - f"{fesom_path}/w.fesom_channel.nc", + str(_fesom_dir / "u.fesom_channel.nc"), + str(_fesom_dir / "v.fesom_channel.nc"), + str(_fesom_dir / "w.fesom_channel.nc"), ] ds = ux.open_mfdataset(grid_path, data_path).rename_vars({"u": "U", "v": "V", "w": "W"}) ds = fesom_to_ugrid(ds) From d6e298d5eb28f734e8c068cc4228c6dc56999b52 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:21:07 +0200 Subject: [PATCH 33/42] Update function names --- src/parcels/_datasets/remote.py | 8 +++----- src/parcels/tutorial.py | 8 ++++---- tests/datasets/test_remote.py | 14 +++++++------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index a96bb7aeaf..2dcc1ee59f 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -11,8 +11,6 @@ from parcels._v3to4 import patch_dataset_v4_compat -__all__ = ["list_datasets", "open_dataset"] - # When modifying existing datasets in a backwards incompatible way, # make a new release in the repo and update the DATA_REPO_TAG to the new tag _DATA_REPO_TAG = "main" @@ -220,7 +218,7 @@ class _Purpose(enum.Enum): # fmt: on -def list_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]: +def list_remote_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]: """List the available remote datasets. Use :func:`open_dataset` to download and open one of the datasets. @@ -244,7 +242,7 @@ def list_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]: return [k for (k, (_, p)) in _DATASET_KEYS_AND_CONFIGS.items() if p == purpose_enum] -def open_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"): +def open_remote_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"): """Download and open a remote dataset as an :class:`xarray.Dataset`. Use :func:`list_datasets` to see the available dataset names. @@ -267,7 +265,7 @@ def open_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"): dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0] except KeyError as e: raise ValueError( - f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets(purpose=purpose)) + f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_remote_datasets(purpose=purpose)) ) from e return dataset_config.open_dataset() diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py index 2ac1780b91..49a4a8b6bf 100644 --- a/src/parcels/tutorial.py +++ b/src/parcels/tutorial.py @@ -1,5 +1,5 @@ -from parcels._datasets.remote import list_datasets as _remote_list_datasets -from parcels._datasets.remote import open_dataset as _remote_open_dataset +from parcels._datasets.remote import list_remote_datasets as _list_remote_datasets +from parcels._datasets.remote import open_remote_dataset as _open_remote_dataset __all__ = ["list_datasets", "open_dataset"] @@ -14,7 +14,7 @@ def list_datasets() -> list[str]: datasets : list of str The names of the available datasets matching the given purpose. """ - return _remote_list_datasets(purpose="tutorial") + return _list_remote_datasets(purpose="tutorial") def open_dataset(name: str): @@ -33,4 +33,4 @@ def open_dataset(name: str): xarray.Dataset The requested dataset. """ - return _remote_open_dataset(name, purpose="tutorial") + return _open_remote_dataset(name, purpose="tutorial") diff --git a/tests/datasets/test_remote.py b/tests/datasets/test_remote.py index 3aa739c42f..856752d018 100644 --- a/tests/datasets/test_remote.py +++ b/tests/datasets/test_remote.py @@ -19,24 +19,24 @@ def test_pooch_registry_url_reponse(url): def test_open_dataset_non_existing(): with pytest.raises(ValueError, match="Dataset.*not found"): - remote.open_dataset("non_existing_dataset") + remote.open_remote_dataset("non_existing_dataset") -@pytest.mark.parametrize("name", remote.list_datasets()) +@pytest.mark.parametrize("name", remote.list_remote_datasets()) def test_open_dataset(name): - ds = remote.open_dataset(name) + ds = remote.open_remote_dataset(name) assert isinstance(ds, xr.Dataset) -@pytest.mark.parametrize("name", remote.list_datasets()) +@pytest.mark.parametrize("name", remote.list_remote_datasets()) def test_dataset_keys(name): assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" def test_list_datasets(): - tutorial_datasets = set(remote.list_datasets("tutorial")) - testing_datasets = set(remote.list_datasets("testing")) - all_datasets = set(remote.list_datasets("any")) + tutorial_datasets = set(remote.list_remote_datasets("tutorial")) + testing_datasets = set(remote.list_remote_datasets("testing")) + all_datasets = set(remote.list_remote_datasets("any")) assert tutorial_datasets.issubset(all_datasets) assert testing_datasets.issubset(all_datasets) assert tutorial_datasets | testing_datasets == all_datasets From e21b024bdabaa8eb67cb33963255a8fbf8f51c83 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:48:20 +0200 Subject: [PATCH 34/42] Update options to open_mfdataset --- src/parcels/_datasets/remote.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index 2dcc1ee59f..72ddf8a103 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -131,7 +131,15 @@ def __init__(self, path_relative_to_root: str, pre_decode_cf_callable=None): def open_dataset(self) -> xr.Dataset: self.download_relevant_files() with xr.set_options(use_new_combine_kwarg_defaults=True): - ds = xr.open_mfdataset(f"{self.pup.path}/{self.path_relative_to_root}", decode_cf=False) + ds = xr.open_mfdataset( + f"{self.pup.path}/{self.path_relative_to_root}", + decode_cf=False, + # options to open mfdataset https://github.com/Parcels-code/Parcels/pull/2574#discussion_r3073256988 + combine="nested", + data_vars="minimal", + coords="minimal", + compat="override", + ) if self.pre_decode_cf_callable is not None: ds = self.pre_decode_cf_callable(ds) From dc652da3ee12716c43989bb0cc7d5a1aac51d8f4 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:55:08 +0200 Subject: [PATCH 35/42] Fix imports in notebooks --- docs/getting_started/tutorial_output.ipynb | 7 ++++--- docs/user_guide/examples/tutorial_diffusion.ipynb | 3 ++- docs/user_guide/examples/tutorial_nemo.ipynb | 3 ++- docs/user_guide/examples/tutorial_sampling.ipynb | 3 ++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb index 6baf5a9438..4831a22791 100644 --- a/docs/getting_started/tutorial_output.ipynb +++ b/docs/getting_started/tutorial_output.ipynb @@ -35,7 +35,8 @@ "import numpy as np\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { @@ -566,7 +567,7 @@ "metadata": { "celltoolbar": "Metagegevens bewerken", "kernelspec": { - "display_name": "test-notebooks", + "display_name": "default", "language": "python", "name": "python3" }, @@ -580,7 +581,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.2" + "version": "3.14.3" } }, "nbformat": 4, diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb index ef2fba7611..43f6636d52 100644 --- a/docs/user_guide/examples/tutorial_diffusion.ipynb +++ b/docs/user_guide/examples/tutorial_diffusion.ipynb @@ -119,7 +119,8 @@ "import trajan as ta\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb index 0824bd54e0..9d31953d14 100644 --- a/docs/user_guide/examples/tutorial_nemo.ipynb +++ b/docs/user_guide/examples/tutorial_nemo.ipynb @@ -50,7 +50,8 @@ "import numpy as np\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb index 8cdb02fc11..b2f1d9db58 100644 --- a/docs/user_guide/examples/tutorial_sampling.ipynb +++ b/docs/user_guide/examples/tutorial_sampling.ipynb @@ -41,7 +41,8 @@ "# To open and look at the temperature data\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { From 472930d8398f9c87c0febbee25f04ddaa5f1c1ca Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:57:25 +0200 Subject: [PATCH 36/42] Remove combine="nested" Use combine coords instead --- src/parcels/_datasets/remote.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index 72ddf8a103..3b9d9c7185 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -135,7 +135,6 @@ def open_dataset(self) -> xr.Dataset: f"{self.pup.path}/{self.path_relative_to_root}", decode_cf=False, # options to open mfdataset https://github.com/Parcels-code/Parcels/pull/2574#discussion_r3073256988 - combine="nested", data_vars="minimal", coords="minimal", compat="override", From 9f7d78f0a772855ac129620e4217daa55a32902b Mon Sep 17 00:00:00 2001 From: Nick Hodgskin <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 16:21:26 +0200 Subject: [PATCH 37/42] Update src/parcels/_datasets/remote.py Co-authored-by: Erik van Sebille --- src/parcels/_datasets/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index 3b9d9c7185..8df039f58e 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -164,7 +164,7 @@ def open_dataset(self) -> xr.Dataset: def _preprocess_drop_time_from_mesh1(ds: xr.Dataset) -> xr.Dataset: - # For some reason on the mesh "NemoNorthSeaORCA025-N006_data/coordinates.nc" there are time dimensions. These dimension also has broken cf-time metadata + # For some reason on the mesh "NemoNorthSeaORCA025-N006_data/coordinates.nc" there are two time dimensions (of length 1). These dimension also has broken cf-time metadata # this fixes that return ds.isel(time=0).drop(["time", "time_steps"]) From feb67c1238f5213bfefe6f56b11152335d30a929 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 16:33:39 +0200 Subject: [PATCH 38/42] Update copernicusmarine example dataset --- docs/getting_started/tutorial_output.ipynb | 14 ++------------ docs/getting_started/tutorial_quickstart.md | 7 ++----- docs/user_guide/examples/explanation_kernelloop.md | 6 +----- docs/user_guide/examples/tutorial_Argofloats.ipynb | 14 ++------------ docs/user_guide/examples/tutorial_delaystart.ipynb | 14 ++------------ docs/user_guide/examples/tutorial_diffusion.ipynb | 14 ++------------ .../examples/tutorial_dt_integrators.ipynb | 14 ++------------ .../user_guide/examples/tutorial_gsw_density.ipynb | 14 ++------------ .../tutorial_manipulating_field_data.ipynb | 14 ++------------ docs/user_guide/examples/tutorial_sampling.ipynb | 14 ++------------ src/parcels/_datasets/remote.py | 4 +--- tests/test_particlefile.py | 2 +- 12 files changed, 21 insertions(+), 110 deletions(-) diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb index 4831a22791..c3dbba8521 100644 --- a/docs/getting_started/tutorial_output.ipynb +++ b/docs/getting_started/tutorial_output.ipynb @@ -53,18 +53,8 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/getting_started/tutorial_quickstart.md b/docs/getting_started/tutorial_quickstart.md index 4c0bf02ac9..cf17e4bb40 100644 --- a/docs/getting_started/tutorial_quickstart.md +++ b/docs/getting_started/tutorial_quickstart.md @@ -30,11 +30,8 @@ hydrodynamics fields in which the particles are tracked. Here we provide an exam [Global Ocean Physics Reanalysis](https://doi.org/10.48670/moi-00021) from the Copernicus Marine Service. ```{code-cell} -ds_fields = xr.merge([ - parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"), - parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"), - parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"), -]) +ds_fields = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data") + ds_fields.load() # load the dataset into memory ds_fields ``` diff --git a/docs/user_guide/examples/explanation_kernelloop.md b/docs/user_guide/examples/explanation_kernelloop.md index 994cbd6228..c4a9a58d12 100644 --- a/docs/user_guide/examples/explanation_kernelloop.md +++ b/docs/user_guide/examples/explanation_kernelloop.md @@ -56,11 +56,7 @@ import parcels import parcels.tutorial # Load the CopernicusMarine data in the Agulhas region from the example_datasets -ds_fields = xr.merge([ - parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"), - parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"), - parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"), -]) +ds_fields = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data") ds_fields.load() # load the dataset into memory # Create an idealised wind field and add it to the dataset diff --git a/docs/user_guide/examples/tutorial_Argofloats.ipynb b/docs/user_guide/examples/tutorial_Argofloats.ipynb index 232c3c8b60..a96e7bcb6d 100644 --- a/docs/user_guide/examples/tutorial_Argofloats.ipynb +++ b/docs/user_guide/examples/tutorial_Argofloats.ipynb @@ -113,18 +113,8 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "\n", "# TODO check how we can get good performance without loading full dataset in memory\n", diff --git a/docs/user_guide/examples/tutorial_delaystart.ipynb b/docs/user_guide/examples/tutorial_delaystart.ipynb index 57d4eb6296..8bb3ffd956 100644 --- a/docs/user_guide/examples/tutorial_delaystart.ipynb +++ b/docs/user_guide/examples/tutorial_delaystart.ipynb @@ -51,18 +51,8 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb index 43f6636d52..5010f84067 100644 --- a/docs/user_guide/examples/tutorial_diffusion.ipynb +++ b/docs/user_guide/examples/tutorial_diffusion.ipynb @@ -468,18 +468,8 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ").isel(depth=slice(0, 1))\n", "ds_fields.load() # load the dataset into memory" ] diff --git a/docs/user_guide/examples/tutorial_dt_integrators.ipynb b/docs/user_guide/examples/tutorial_dt_integrators.ipynb index 6dd0ed9563..bd4d93de54 100644 --- a/docs/user_guide/examples/tutorial_dt_integrators.ipynb +++ b/docs/user_guide/examples/tutorial_dt_integrators.ipynb @@ -62,18 +62,8 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/user_guide/examples/tutorial_gsw_density.ipynb b/docs/user_guide/examples/tutorial_gsw_density.ipynb index 5528051e32..fe089bbe13 100644 --- a/docs/user_guide/examples/tutorial_gsw_density.ipynb +++ b/docs/user_guide/examples/tutorial_gsw_density.ipynb @@ -39,18 +39,8 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "\n", "# TODO check how we can get good performance without loading full dataset in memory\n", diff --git a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb index cf085ade90..4196ba4a5f 100644 --- a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb +++ b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb @@ -46,18 +46,8 @@ "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb index b2f1d9db58..87855debe5 100644 --- a/docs/user_guide/examples/tutorial_sampling.ipynb +++ b/docs/user_guide/examples/tutorial_sampling.ipynb @@ -60,18 +60,8 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n", - " ),\n", - " parcels.tutorial.open_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n", - " ),\n", - " ]\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index 8df039f58e..14c30cd112 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -201,9 +201,7 @@ class _Purpose(enum.Enum): ("Peninsula_data/P", (_V3Dataset("data/Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)), ("Peninsula_data/T", (_V3Dataset("data/Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)), ("GlobCurrent_example_data/data", (_V3Dataset("data/GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), - ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/data", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-*.nc"), _Purpose.TUTORIAL)), ("DecayingMovingEddy_data/U", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)), ("DecayingMovingEddy_data/V", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)), ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)), diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py index ef4d1b565b..d5ccb4c1a8 100755 --- a/tests/test_particlefile.py +++ b/tests/test_particlefile.py @@ -442,7 +442,7 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying(): dt = -timedelta(minutes=5) # TODO: Not ideal using the `open_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have - ds_in = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc") + ds_in = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data") fields = {"U": ds_in["uo"], "V": ds_in["vo"]} ds_fset = copernicusmarine_to_sgrid(fields=fields) fieldset = FieldSet.from_sgrid_conventions(ds_fset) From 7b05a5414a72920db5776b8310a7d96d2a78abce Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:11:34 +0200 Subject: [PATCH 39/42] Bump minimum Xarray dependency and transitive deps option use_new_combine_kwarg_defaults was introduced in 2025.8.0 --- .github/ci/recipe.yaml | 2 +- pixi.toml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ci/recipe.yaml b/.github/ci/recipe.yaml index 7416a211d8..fb77725e40 100644 --- a/.github/ci/recipe.yaml +++ b/.github/ci/recipe.yaml @@ -35,7 +35,7 @@ requirements: - netcdf4 >=1.7.2 - numpy >=2.1.0 - tqdm >=4.50.0 - - xarray >=2024.5.0 + - xarray >=2025.8.0 - cf_xarray >=0.8.6 - xgcm >=0.9.0 - zarr >=2.15.0,!=2.18.0,<3 diff --git a/pixi.toml b/pixi.toml index 4f5b3d5e05..64edb4e7f3 100644 --- a/pixi.toml +++ b/pixi.toml @@ -51,10 +51,10 @@ python = "3.11.*" netcdf4 = "1.6.*" numpy = "2.1.*" tqdm = "4.50.*" -xarray = "2024.5.*" +xarray = "2025.8.*" uxarray = "2025.3.*" -dask = "2024.5.*" -zarr = "2.15.*" +dask = "2024.6.*" +zarr = "2.18.*" xgcm = { version = "0.9.*", channel = "conda-forge" } cf_xarray = "0.8.*" cftime = "1.6.*" From 232b240e1baf2e8f39f52d0bfbf8600dbeb7db6b Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:30:41 +0200 Subject: [PATCH 40/42] Fix nemo ingestion --- docs/user_guide/examples/tutorial_nemo.ipynb | 31 ++++++++------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb index 9d31953d14..61fdbac563 100644 --- a/docs/user_guide/examples/tutorial_nemo.ipynb +++ b/docs/user_guide/examples/tutorial_nemo.ipynb @@ -67,15 +67,11 @@ "metadata": {}, "outputs": [], "source": [ - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/U\"),\n", - " parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/V\"),\n", - " ]\n", - ") # TODO: Fix grid staggering (using SGRID metadata)\n", + "ds_u = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/U\")\n", + "ds_v = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/V\")\n", "ds_coords = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/mesh_mask\")\n", "ds_fset = parcels.convert.nemo_to_sgrid(\n", - " fields=dict(U=ds_fields[\"U\"], V=ds_fields[\"V\"]), coords=ds_coords\n", + " fields=dict(U=ds_u[\"U\"], V=ds_v[\"V\"]), coords=ds_coords\n", ")\n", "\n", "fieldset = parcels.FieldSet.from_sgrid_conventions(ds_fset)" @@ -290,16 +286,15 @@ "metadata": {}, "outputs": [], "source": [ - "ds_fields = xr.merge(\n", - " [\n", - " parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/U\"),\n", - " parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/V\"),\n", - " parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/W\"),\n", - " ]\n", - ") # TODO: Fix grid staggering (using SGRID metadata)\n", - "ds_coords = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/mesh_mask\")\n", + "ds_u = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/U\")\n", + "ds_v = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/V\")\n", + "ds_w = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/W\")\n", + "ds_coords = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/mesh_mask\")[\n", + " [\"glamf\", \"gphif\"]\n", + "]\n", + "\n", "ds_fset = parcels.convert.nemo_to_sgrid(\n", - " fields={\"U\": ds_fields[\"uo\"], \"V\": ds_fields[\"vo\"], \"W\": ds_fields[\"wo\"]},\n", + " fields={\"U\": ds_u[\"uo\"], \"V\": ds_v[\"vo\"], \"W\": ds_w[\"wo\"]},\n", " coords=ds_coords,\n", ")\n", "fieldset = parcels.FieldSet.from_sgrid_conventions(ds_fset)" @@ -368,7 +363,7 @@ ], "metadata": { "kernelspec": { - "display_name": "docs", + "display_name": "default", "language": "python", "name": "python3" }, @@ -382,7 +377,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.2" + "version": "3.14.3" } }, "nbformat": 4, From 1b285797e22b064a4a6da77b694c3665f9348ceb Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:49:51 +0200 Subject: [PATCH 41/42] Fix cf_xarray dep in minimum env --- pixi.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index 64edb4e7f3..22e141453d 100644 --- a/pixi.toml +++ b/pixi.toml @@ -56,7 +56,7 @@ uxarray = "2025.3.*" dask = "2024.6.*" zarr = "2.18.*" xgcm = { version = "0.9.*", channel = "conda-forge" } -cf_xarray = "0.8.*" +cf_xarray = "0.10.*" cftime = "1.6.*" pooch = "1.8.*" From 7b4006e8f76472a428c4251be668b4720657951c Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:53:38 +0200 Subject: [PATCH 42/42] Fix open_remote_dataset It previously didn't respect the purpose since it relied on the KeyError --- src/parcels/_datasets/remote.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py index 14c30cd112..e37c6a7723 100644 --- a/src/parcels/_datasets/remote.py +++ b/src/parcels/_datasets/remote.py @@ -266,11 +266,10 @@ def open_remote_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"): xarray.Dataset The requested dataset. """ - try: - dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0] - except KeyError as e: + if name not in list_remote_datasets(purpose=purpose): raise ValueError( f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_remote_datasets(purpose=purpose)) - ) from e + ) + dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0] return dataset_config.open_dataset()