From 90356d1c08cda881601abda477c7018100835731 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 15:35:29 +0200
Subject: [PATCH 01/42] Add tutorial.open_dataset

---
 src/parcels/_tutorial.py | 62 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py
index 34409312cc..5c0fd5cac5 100644
--- a/src/parcels/_tutorial.py
+++ b/src/parcels/_tutorial.py
@@ -4,6 +4,7 @@
 
 import pooch
 import xarray as xr
+import zarr
 
 from parcels._v3to4 import patch_dataset_v4_compat
 
@@ -106,6 +107,40 @@
     "CROCOidealized_data": ["CROCO_idealized.nc"],
 }
 
+EXAMPLE_DATA_FILES2: list[str] = [
+    "MovingEddies_data/moving_eddiesP.nc",
+    "MovingEddies_data/moving_eddiesU.nc",
+    "MovingEddies_data/moving_eddiesV.nc",
+    "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc",
+    "OFAM_example_data/OFAM_simple_U.nc",
+    "OFAM_example_data/OFAM_simple_V.nc",
+    "Peninsula_data/peninsulaU.nc",
+    "Peninsula_data/peninsulaV.nc",
+    "Peninsula_data/peninsulaP.nc",
+    "Peninsula_data/peninsulaT.nc",
+    "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc",
+    "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+    "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+    "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+    "DecayingMovingEddy_data/decaying_moving_eddyU.nc",
+    "DecayingMovingEddy_data/decaying_moving_eddyV.nc",
+    "FESOM_periodic_channel/fesom_channel.nc",
+    "FESOM_periodic_channel/u.fesom_channel.nc",
+    "FESOM_periodic_channel/v.fesom_channel.nc",
+    "FESOM_periodic_channel/w.fesom_channel.nc",
+    "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4",
+    "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4",
+    "NemoCurvilinear_data/mesh_mask.nc4",
+    "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc",
+    "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc",
+    "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc",
+    "NemoNorthSeaORCA025-N006_data/coordinates.nc",
+    # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
+    "SWASH_data/field_00655*.nc",
+    "WOA_data/woa18_decav_t*_04.nc",
+    "CROCOidealized_data/CROCO_idealized.nc",
+]
+
 
 def _create_pooch_registry() -> dict[str, None]:
     """Collapses the mapping of dataset names to filenames into a pooch registry.
@@ -135,7 +170,7 @@ def _get_pooch(data_home=None):
     )
 
 
-def list_example_datasets() -> list[str]:
+def list_example_datasets(full=False) -> list[str]:  # TODO: Remove full flag when migrating to open_dataset
     """List the available example datasets.
 
     Use :func:`download_example_dataset` to download one of the datasets.
@@ -145,7 +180,11 @@ def list_example_datasets() -> list[str]:
     datasets : list of str
         The names of the available example datasets.
     """
-    return list(EXAMPLE_DATA_FILES.keys())
+    if full:
+        return EXAMPLE_DATA_FILES2
+    return list(
+        set(i.split("/")[0] for i in EXAMPLE_DATA_FILES2)
+    )  # TODO: Update implementation to return full dataset item and not just stem, to be in line with `open_dataset`
 
 
 def download_example_dataset(dataset: str, data_home=None):
@@ -186,6 +225,25 @@ def download_example_dataset(dataset: str, data_home=None):
     return dataset_folder
 
 
+# Just creating a temp folder to help during the migration
+TMP_ZARR_FOLDER = Path("../parcels-data-zarr/data")
+
+
+def open_dataset(dataset: str):
+    open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False)
+    # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
+    dataset_stem, rest = dataset.split("/", maxsplit=1)
+    folder = download_example_dataset(dataset_stem)
+
+    ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
+    path = TMP_ZARR_FOLDER / f"{dataset}.zip"
+    path.parent.mkdir(exist_ok=True)
+    if not path.exists():
+        with zarr.storage.ZipStore(path, mode="w") as store:
+            ds.to_zarr(store)
+    return xr.open_zarr(path, **open_dataset_kwargs)
+
+
 def _v4_compat_patch(fname, action, pup):
     """
     Patch the GlobCurrent example dataset to be compatible with v4.

From 1c364befcde9377c9061281f2f8c9c679642dacf Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 16:13:43 +0200
Subject: [PATCH 02/42] Update dataset names for `open_dataset`

---
 src/parcels/_tutorial.py | 93 +++++++++++++++++++++-------------------
 1 file changed, 50 insertions(+), 43 deletions(-)

diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py
index 5c0fd5cac5..8a6882d115 100644
--- a/src/parcels/_tutorial.py
+++ b/src/parcels/_tutorial.py
@@ -107,39 +107,43 @@
     "CROCOidealized_data": ["CROCO_idealized.nc"],
 }
 
-EXAMPLE_DATA_FILES2: list[str] = [
-    "MovingEddies_data/moving_eddiesP.nc",
-    "MovingEddies_data/moving_eddiesU.nc",
-    "MovingEddies_data/moving_eddiesV.nc",
-    "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc",
-    "OFAM_example_data/OFAM_simple_U.nc",
-    "OFAM_example_data/OFAM_simple_V.nc",
-    "Peninsula_data/peninsulaU.nc",
-    "Peninsula_data/peninsulaV.nc",
-    "Peninsula_data/peninsulaP.nc",
-    "Peninsula_data/peninsulaT.nc",
-    "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc",
-    "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-    "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-    "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-    "DecayingMovingEddy_data/decaying_moving_eddyU.nc",
-    "DecayingMovingEddy_data/decaying_moving_eddyV.nc",
-    "FESOM_periodic_channel/fesom_channel.nc",
-    "FESOM_periodic_channel/u.fesom_channel.nc",
-    "FESOM_periodic_channel/v.fesom_channel.nc",
-    "FESOM_periodic_channel/w.fesom_channel.nc",
-    "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4",
-    "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4",
-    "NemoCurvilinear_data/mesh_mask.nc4",
-    "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc",
-    "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc",
-    "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc",
-    "NemoNorthSeaORCA025-N006_data/coordinates.nc",
+# The first here is a human readable key, the latter the path to load the netcdf data
+# (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`)
+# fmt: off
+DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([
+    ("MovingEddies_data/P", "MovingEddies_data/moving_eddiesP.nc"),
+    ("MovingEddies_data/U", "MovingEddies_data/moving_eddiesU.nc"),
+    ("MovingEddies_data/V", "MovingEddies_data/moving_eddiesV.nc"),
+    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"),
+    ("OFAM_example_data/U", "OFAM_example_data/OFAM_simple_U.nc"),
+    ("OFAM_example_data/V", "OFAM_example_data/OFAM_simple_V.nc"),
+    ("Peninsula_data/U", "Peninsula_data/peninsulaU.nc"),
+    ("Peninsula_data/V", "Peninsula_data/peninsulaV.nc"),
+    ("Peninsula_data/P", "Peninsula_data/peninsulaP.nc"),
+    ("Peninsula_data/T", "Peninsula_data/peninsulaT.nc"),
+    ("GlobCurrent_example_data/data.nc", "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"),
+    ("DecayingMovingEddy_data/U", "DecayingMovingEddy_data/decaying_moving_eddyU.nc"),
+    ("DecayingMovingEddy_data/V", "DecayingMovingEddy_data/decaying_moving_eddyV.nc"),
+    ("FESOM_periodic_channel/fesom_channel", "FESOM_periodic_channel/fesom_channel.nc"),
+    ("FESOM_periodic_channel/u.fesom_channel", "FESOM_periodic_channel/u.fesom_channel.nc"),
+    ("FESOM_periodic_channel/v.fesom_channel", "FESOM_periodic_channel/v.fesom_channel.nc"),
+    ("FESOM_periodic_channel/w.fesom_channel", "FESOM_periodic_channel/w.fesom_channel.nc"),
+    ("NemoCurvilinear_data_zonal/U", "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"),
+    ("NemoCurvilinear_data_zonal/V", "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"),
+    ("NemoCurvilinear_data_zonal/mesh_mask", "NemoCurvilinear_data/mesh_mask.nc4"),
+    ("NemoNorthSeaORCA025-N006_data/U", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"),
+    ("NemoNorthSeaORCA025-N006_data/V", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"),
+    ("NemoNorthSeaORCA025-N006_data/W", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"),
+    ("NemoNorthSeaORCA025-N006_data/mesh_mask", "NemoNorthSeaORCA025-N006_data/coordinates.nc"),
     # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
-    "SWASH_data/field_00655*.nc",
-    "WOA_data/woa18_decav_t*_04.nc",
-    "CROCOidealized_data/CROCO_idealized.nc",
-]
+    ("SWASH_data/data", "SWASH_data/field_00655*.nc"),
+    ("WOA_data/data", "WOA_data/woa18_decav_t*_04.nc"),
+    ("CROCOidealized_data/data", "CROCOidealized_data/CROCO_idealized.nc"),
+])
+# fmt: on
 
 
 def _create_pooch_registry() -> dict[str, None]:
@@ -170,7 +174,7 @@ def _get_pooch(data_home=None):
     )
 
 
-def list_example_datasets(full=False) -> list[str]:  # TODO: Remove full flag when migrating to open_dataset
+def list_example_datasets(v4=False) -> list[str]:  # TODO: Remove v4 flag when migrating to open_dataset
     """List the available example datasets.
 
     Use :func:`download_example_dataset` to download one of the datasets.
@@ -180,11 +184,9 @@ def list_example_datasets(full=False) -> list[str]:  # TODO: Remove full flag wh
     datasets : list of str
         The names of the available example datasets.
     """
-    if full:
-        return EXAMPLE_DATA_FILES2
-    return list(
-        set(i.split("/")[0] for i in EXAMPLE_DATA_FILES2)
-    )  # TODO: Update implementation to return full dataset item and not just stem, to be in line with `open_dataset`
+    if v4:
+        return list(DATASET_KEYS_AND_OPEN_PATHS.keys())
+    return list(set(v.split("/")[0] for v in DATASET_KEYS_AND_OPEN_PATHS.values()))
 
 
 def download_example_dataset(dataset: str, data_home=None):
@@ -226,17 +228,22 @@ def download_example_dataset(dataset: str, data_home=None):
 
 
 # Just creating a temp folder to help during the migration
-TMP_ZARR_FOLDER = Path("../parcels-data-zarr/data")
+TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr")
 
 
-def open_dataset(dataset: str):
+def open_dataset(name: str):
+    if name not in DATASET_KEYS_AND_OPEN_PATHS:
+        raise ValueError(
+            f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True))
+        )
+
     open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False)
     # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
-    dataset_stem, rest = dataset.split("/", maxsplit=1)
-    folder = download_example_dataset(dataset_stem)
+    download_dataset_stem, rest = DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1)
+    folder = download_example_dataset(download_dataset_stem)
 
     ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
-    path = TMP_ZARR_FOLDER / f"{dataset}.zip"
+    path = TMP_ZARR_FOLDER / f"{name}.zip"
     path.parent.mkdir(exist_ok=True)
     if not path.exists():
         with zarr.storage.ZipStore(path, mode="w") as store:

From f5cbc2565209904d241d9e607a6e194a365966c8 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 16:14:06 +0200
Subject: [PATCH 03/42] Port accross tutorial util tests

---
 .../tools/test_exampledata_utils.py => tests/test_tutorial.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tests-v3/tools/test_exampledata_utils.py => tests/test_tutorial.py (96%)

diff --git a/tests-v3/tools/test_exampledata_utils.py b/tests/test_tutorial.py
similarity index 96%
rename from tests-v3/tools/test_exampledata_utils.py
rename to tests/test_tutorial.py
index 94ed9cf833..3169a07c2e 100644
--- a/tests-v3/tools/test_exampledata_utils.py
+++ b/tests/test_tutorial.py
@@ -1,7 +1,7 @@
 import pytest
 import requests
 
-from parcels.tools.exampledata_utils import (
+from parcels._tutorial import (
     _get_pooch,
     download_example_dataset,
     list_example_datasets,

From e5a85c4d30ec85c314c570575a121c3264aedf3c Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 16:18:01 +0200
Subject: [PATCH 04/42] Add test_open_dataset

---
 tests/test_tutorial.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py
index 3169a07c2e..f603899308 100644
--- a/tests/test_tutorial.py
+++ b/tests/test_tutorial.py
@@ -1,10 +1,12 @@
 import pytest
 import requests
+import xarray as xr
 
 from parcels._tutorial import (
     _get_pooch,
     download_example_dataset,
     list_example_datasets,
+    open_dataset,
 )
 
 
@@ -35,3 +37,9 @@ def test_download_example_dataset_no_data_home():
     dataset_folder_path = download_example_dataset(dataset)
     assert dataset_folder_path.exists()
     assert dataset_folder_path.name == dataset
+
+
+@pytest.mark.parametrize("name", list_example_datasets(v4=True))
+def test_open_dataset(name):
+    ds = open_dataset(name)
+    assert isinstance(ds, xr.Dataset)

From 9ef869a523f0b44fe7c34dcecd5a600d03ffa261 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 16:24:56 +0200
Subject: [PATCH 05/42] Silence future warning

---
 src/parcels/_tutorial.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py
index 8a6882d115..f82d8675e5 100644
--- a/src/parcels/_tutorial.py
+++ b/src/parcels/_tutorial.py
@@ -242,7 +242,8 @@ def open_dataset(name: str):
     download_dataset_stem, rest = DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1)
     folder = download_example_dataset(download_dataset_stem)
 
-    ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
+    with xr.set_options(use_new_combine_kwarg_defaults=True):
+        ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
     path = TMP_ZARR_FOLDER / f"{name}.zip"
     path.parent.mkdir(exist_ok=True)
     if not path.exists():

From 0f6a8c9e160f3931f45cbcf792e08bdb91d7cb8e Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:01:45 +0200
Subject: [PATCH 06/42] Make variables private

---
 src/parcels/_tutorial.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py
index f82d8675e5..56ff0531bd 100644
--- a/src/parcels/_tutorial.py
+++ b/src/parcels/_tutorial.py
@@ -12,9 +12,9 @@
 
 # When modifying existing datasets in a backwards incompatible way,
 # make a new release in the repo and update the DATA_REPO_TAG to the new tag
-DATA_REPO_TAG = "main"
+_DATA_REPO_TAG = "main"
 
-DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{DATA_REPO_TAG}/data"
+_DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data"
 
 # Keys are the dataset names. Values are the filenames in the dataset folder. Note that
 # you can specify subfolders in the dataset folder putting slashes in the filename list.
@@ -28,7 +28,7 @@
 #     └── file2.nc
 #
 # See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets
-EXAMPLE_DATA_FILES: dict[str, list[str]] = {
+_EXAMPLE_DATA_FILES: dict[str, list[str]] = {
     "MovingEddies_data": [
         "moving_eddiesP.nc",
         "moving_eddiesU.nc",
@@ -110,7 +110,7 @@
 # The first here is a human readable key, the latter the path to load the netcdf data
 # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`)
 # fmt: off
-DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([
+_DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([
     ("MovingEddies_data/P", "MovingEddies_data/moving_eddiesP.nc"),
     ("MovingEddies_data/U", "MovingEddies_data/moving_eddiesU.nc"),
     ("MovingEddies_data/V", "MovingEddies_data/moving_eddiesV.nc"),
@@ -152,7 +152,7 @@ def _create_pooch_registry() -> dict[str, None]:
     Hashes are set to None for all files.
     """
     registry: dict[str, None] = {}
-    for dataset, filenames in EXAMPLE_DATA_FILES.items():
+    for dataset, filenames in _EXAMPLE_DATA_FILES.items():
         for filename in filenames:
             registry[f"{dataset}/{filename}"] = None
     return registry
@@ -169,7 +169,7 @@ def _get_pooch(data_home=None):
 
     return pooch.create(
         path=data_home,
-        base_url=DATA_URL,
+        base_url=_DATA_URL,
         registry=POOCH_REGISTRY,
     )
 
@@ -185,8 +185,8 @@ def list_example_datasets(v4=False) -> list[str]:  # TODO: Remove v4 flag when m
         The names of the available example datasets.
     """
     if v4:
-        return list(DATASET_KEYS_AND_OPEN_PATHS.keys())
-    return list(set(v.split("/")[0] for v in DATASET_KEYS_AND_OPEN_PATHS.values()))
+        return list(_DATASET_KEYS_AND_OPEN_PATHS.keys())
+    return list(set(v.split("/")[0] for v in _DATASET_KEYS_AND_OPEN_PATHS.values()))
 
 
 def download_example_dataset(dataset: str, data_home=None):
@@ -210,9 +210,9 @@ def download_example_dataset(dataset: str, data_home=None):
         Path to the folder containing the downloaded dataset files.
     """
     # Dev note: `dataset` is assumed to be a folder name with netcdf files
-    if dataset not in EXAMPLE_DATA_FILES:
+    if dataset not in _EXAMPLE_DATA_FILES:
         raise ValueError(
-            f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(EXAMPLE_DATA_FILES.keys())
+            f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys())
         )
     odie = _get_pooch(data_home=data_home)
 
@@ -228,23 +228,23 @@ def download_example_dataset(dataset: str, data_home=None):
 
 
 # Just creating a temp folder to help during the migration
-TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr")
+_TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr")
 
 
 def open_dataset(name: str):
-    if name not in DATASET_KEYS_AND_OPEN_PATHS:
+    if name not in _DATASET_KEYS_AND_OPEN_PATHS:
         raise ValueError(
             f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True))
         )
 
     open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False)
     # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
-    download_dataset_stem, rest = DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1)
+    download_dataset_stem, rest = _DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1)
     folder = download_example_dataset(download_dataset_stem)
 
     with xr.set_options(use_new_combine_kwarg_defaults=True):
         ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
-    path = TMP_ZARR_FOLDER / f"{name}.zip"
+    path = _TMP_ZARR_FOLDER / f"{name}.zip"
     path.parent.mkdir(exist_ok=True)
     if not path.exists():
         with zarr.storage.ZipStore(path, mode="w") as store:

From 970968133f35982c50a74b5bf3791004bb07da4e Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:02:02 +0200
Subject: [PATCH 07/42] Make parcels.tutorial public

---
 src/parcels/__init__.py                   | 2 +-
 src/parcels/{_tutorial.py => tutorial.py} | 0
 tests/test_index_search.py                | 2 +-
 tests/test_tutorial.py                    | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename src/parcels/{_tutorial.py => tutorial.py} (100%)

diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py
index c13850a333..7d12abf5ed 100644
--- a/src/parcels/__init__.py
+++ b/src/parcels/__init__.py
@@ -38,7 +38,7 @@
     ParticleSetWarning,
 )
 from parcels._logger import logger
-from parcels._tutorial import download_example_dataset, list_example_datasets
+from parcels.tutorial import download_example_dataset, list_example_datasets
 
 __all__ = [  # noqa: RUF022
     # Core classes
diff --git a/src/parcels/_tutorial.py b/src/parcels/tutorial.py
similarity index 100%
rename from src/parcels/_tutorial.py
rename to src/parcels/tutorial.py
diff --git a/tests/test_index_search.py b/tests/test_index_search.py
index 6f20ba1e93..27e8ba04c2 100644
--- a/tests/test_index_search.py
+++ b/tests/test_index_search.py
@@ -6,8 +6,8 @@
 from parcels import Field, XGrid
 from parcels._core.index_search import _search_indices_curvilinear_2d
 from parcels._datasets.structured.generic import datasets
-from parcels._tutorial import download_example_dataset
 from parcels.interpolators import XLinear
+from parcels.tutorial import download_example_dataset
 
 
 @pytest.fixture
diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py
index f603899308..4ab1636c2b 100644
--- a/tests/test_tutorial.py
+++ b/tests/test_tutorial.py
@@ -2,7 +2,7 @@
 import requests
 import xarray as xr
 
-from parcels._tutorial import (
+from parcels.tutorial import (
     _get_pooch,
     download_example_dataset,
     list_example_datasets,

From e4247557e5ca8dcbc5ab21a4b87506be331c6053 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:21:38 +0200
Subject: [PATCH 08/42] Remove `download_example_dataset` and
 `list_example_datasets` from root API

Users must now do `import parcels.tutorial`
---
 docs/getting_started/tutorial_output.ipynb                 | 2 +-
 docs/getting_started/tutorial_quickstart.md                | 3 ++-
 docs/user_guide/examples/explanation_kernelloop.md         | 3 ++-
 docs/user_guide/examples/tutorial_Argofloats.ipynb         | 3 ++-
 docs/user_guide/examples/tutorial_croco_3D.ipynb           | 3 ++-
 docs/user_guide/examples/tutorial_delaystart.ipynb         | 3 ++-
 docs/user_guide/examples/tutorial_diffusion.ipynb          | 2 +-
 docs/user_guide/examples/tutorial_dt_integrators.ipynb     | 3 ++-
 docs/user_guide/examples/tutorial_gsw_density.ipynb        | 3 ++-
 .../examples/tutorial_manipulating_field_data.ipynb        | 3 ++-
 docs/user_guide/examples/tutorial_mitgcm.ipynb             | 3 ++-
 docs/user_guide/examples/tutorial_nemo.ipynb               | 4 ++--
 docs/user_guide/examples/tutorial_sampling.ipynb           | 2 +-
 src/parcels/__init__.py                                    | 1 -
 tests/test_advection.py                                    | 7 ++++---
 tests/test_convert.py                                      | 7 ++++---
 tests/test_particlefile.py                                 | 4 ++--
 tests/test_sigmagrids.py                                   | 5 +++--
 tests/test_uxarray_fieldset.py                             | 4 ++--
 19 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb
index 485c3c0800..3b9eedec56 100644
--- a/docs/getting_started/tutorial_output.ipynb
+++ b/docs/getting_started/tutorial_output.ipynb
@@ -52,7 +52,7 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/docs/getting_started/tutorial_quickstart.md b/docs/getting_started/tutorial_quickstart.md
index 60d5ced099..c806311e2c 100644
--- a/docs/getting_started/tutorial_quickstart.md
+++ b/docs/getting_started/tutorial_quickstart.md
@@ -20,6 +20,7 @@ and writing output files that can be read with xarray.
 import numpy as np
 import xarray as xr
 import parcels
+import parcels.tutorial
 ```
 
 ## Input flow fields: `FieldSet`
@@ -29,7 +30,7 @@ hydrodynamics fields in which the particles are tracked. Here we provide an exam
 [Global Ocean Physics Reanalysis](https://doi.org/10.48670/moi-00021) from the Copernicus Marine Service.
 
 ```{code-cell}
-example_dataset_folder = parcels.download_example_dataset(
+example_dataset_folder = parcels.tutorial.download_example_dataset(
     "CopernicusMarine_data_for_Argo_tutorial"
 )
 
diff --git a/docs/user_guide/examples/explanation_kernelloop.md b/docs/user_guide/examples/explanation_kernelloop.md
index bb2d20743b..582be17ed7 100644
--- a/docs/user_guide/examples/explanation_kernelloop.md
+++ b/docs/user_guide/examples/explanation_kernelloop.md
@@ -53,9 +53,10 @@ import numpy as np
 import xarray as xr
 
 import parcels
+import parcels.tutorial
 
 # Load the CopernicusMarine data in the Agulhas region from the example_datasets
-example_dataset_folder = parcels.download_example_dataset(
+example_dataset_folder = parcels.tutorial.download_example_dataset(
     "CopernicusMarine_data_for_Argo_tutorial"
 )
 
diff --git a/docs/user_guide/examples/tutorial_Argofloats.ipynb b/docs/user_guide/examples/tutorial_Argofloats.ipynb
index 0a37193ce7..d0b487a24b 100644
--- a/docs/user_guide/examples/tutorial_Argofloats.ipynb
+++ b/docs/user_guide/examples/tutorial_Argofloats.ipynb
@@ -110,9 +110,10 @@
     "import xarray as xr\n",
     "\n",
     "import parcels\n",
+    "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_croco_3D.ipynb b/docs/user_guide/examples/tutorial_croco_3D.ipynb
index a84b0cc742..515a5cde08 100644
--- a/docs/user_guide/examples/tutorial_croco_3D.ipynb
+++ b/docs/user_guide/examples/tutorial_croco_3D.ipynb
@@ -39,8 +39,9 @@
     "import xarray as xr\n",
     "\n",
     "import parcels\n",
+    "import parcels.tutorial\n",
     "\n",
-    "data_folder = parcels.download_example_dataset(\"CROCOidealized_data\")\n",
+    "data_folder = parcels.tutorial.download_example_dataset(\"CROCOidealized_data\")\n",
     "ds_fields = xr.open_dataset(data_folder / \"CROCO_idealized.nc\")\n",
     "\n",
     "ds_fields.load();  # Preload data to speed up access"
diff --git a/docs/user_guide/examples/tutorial_delaystart.ipynb b/docs/user_guide/examples/tutorial_delaystart.ipynb
index 03bdb6a589..5ca66f4cd9 100644
--- a/docs/user_guide/examples/tutorial_delaystart.ipynb
+++ b/docs/user_guide/examples/tutorial_delaystart.ipynb
@@ -30,6 +30,7 @@
     "from matplotlib.animation import FuncAnimation\n",
     "\n",
     "import parcels\n",
+    "import parcels.tutorial\n",
     "\n",
     "# for interactive display of animations\n",
     "plt.rcParams[\"animation.html\"] = \"jshtml\""
@@ -50,7 +51,7 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb
index 6e42b4ce91..992dcdfa55 100644
--- a/docs/user_guide/examples/tutorial_diffusion.ipynb
+++ b/docs/user_guide/examples/tutorial_diffusion.ipynb
@@ -467,7 +467,7 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_dt_integrators.ipynb b/docs/user_guide/examples/tutorial_dt_integrators.ipynb
index 2ecf7f81af..3ac6be4e13 100644
--- a/docs/user_guide/examples/tutorial_dt_integrators.ipynb
+++ b/docs/user_guide/examples/tutorial_dt_integrators.ipynb
@@ -59,9 +59,10 @@
     "import xarray as xr\n",
     "\n",
     "import parcels\n",
+    "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_gsw_density.ipynb b/docs/user_guide/examples/tutorial_gsw_density.ipynb
index 44b5928a7a..22bbba4ed3 100644
--- a/docs/user_guide/examples/tutorial_gsw_density.ipynb
+++ b/docs/user_guide/examples/tutorial_gsw_density.ipynb
@@ -36,9 +36,10 @@
     "import xarray as xr\n",
     "\n",
     "import parcels\n",
+    "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
index f5dc55571b..dc17a0d6d0 100644
--- a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
+++ b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
@@ -43,9 +43,10 @@
     "import xarray as xr\n",
     "\n",
     "import parcels\n",
+    "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_mitgcm.ipynb b/docs/user_guide/examples/tutorial_mitgcm.ipynb
index e94a0ea885..845d15e361 100644
--- a/docs/user_guide/examples/tutorial_mitgcm.ipynb
+++ b/docs/user_guide/examples/tutorial_mitgcm.ipynb
@@ -28,8 +28,9 @@
     "import xarray as xr\n",
     "\n",
     "import parcels\n",
+    "import parcels.tutorial\n",
     "\n",
-    "data_folder = parcels.download_example_dataset(\"MITgcm_example_data\")\n",
+    "data_folder = parcels.tutorial.download_example_dataset(\"MITgcm_example_data\")\n",
     "ds_fields = xr.open_dataset(data_folder / \"mitgcm_UV_surface_zonally_reentrant.nc\")"
    ]
   },
diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb
index fd2f50ab6c..5f27d2af65 100644
--- a/docs/user_guide/examples/tutorial_nemo.ipynb
+++ b/docs/user_guide/examples/tutorial_nemo.ipynb
@@ -66,7 +66,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_folder = parcels.download_example_dataset(\"NemoCurvilinear_data\")\n",
+    "data_folder = parcels.tutorial.download_example_dataset(\"NemoCurvilinear_data\")\n",
     "ds_fields = xr.open_mfdataset(\n",
     "    data_folder.glob(\"*.nc4\"),\n",
     "    data_vars=\"minimal\",\n",
@@ -291,7 +291,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_folder = parcels.download_example_dataset(\"NemoNorthSeaORCA025-N006_data\")\n",
+    "data_folder = parcels.tutorial.download_example_dataset(\"NemoNorthSeaORCA025-N006_data\")\n",
     "ds_fields = xr.open_mfdataset(\n",
     "    data_folder.glob(\"ORCA*.nc\"),\n",
     "    data_vars=\"minimal\",\n",
diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb
index 27ff5af9de..1867a5b1ca 100644
--- a/docs/user_guide/examples/tutorial_sampling.ipynb
+++ b/docs/user_guide/examples/tutorial_sampling.ipynb
@@ -59,7 +59,7 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.download_example_dataset(\n",
+    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
     "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
     ")\n",
     "\n",
diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py
index 7d12abf5ed..5330b6212a 100644
--- a/src/parcels/__init__.py
+++ b/src/parcels/__init__.py
@@ -38,7 +38,6 @@
     ParticleSetWarning,
 )
 from parcels._logger import logger
-from parcels.tutorial import download_example_dataset, list_example_datasets
 
 __all__ = [  # noqa: RUF022
     # Core classes
diff --git a/tests/test_advection.py b/tests/test_advection.py
index c5d6a9ebf4..4898838fa9 100644
--- a/tests/test_advection.py
+++ b/tests/test_advection.py
@@ -3,6 +3,7 @@
 import xarray as xr
 
 import parcels
+import parcels.tutorial
 from parcels import (
     Field,
     FieldSet,
@@ -455,7 +456,7 @@ def UpdateP(particles, fieldset):  # pragma: no cover
 
 
 def test_nemo_curvilinear_fieldset():
-    data_folder = parcels.download_example_dataset("NemoCurvilinear_data")
+    data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data")
     U = xr.open_mfdataset(data_folder.glob("*U.nc4"))
     V = xr.open_mfdataset(data_folder.glob("*V.nc4"))
     coords = xr.open_dataset(data_folder / "mesh_mask.nc4")
@@ -476,7 +477,7 @@ def test_nemo_curvilinear_fieldset():
 
 @pytest.mark.parametrize("kernel", [AdvectionRK4, AdvectionRK4_3D])
 def test_nemo_3D_curvilinear_fieldset(kernel):
-    data_folder = parcels.download_example_dataset("NemoNorthSeaORCA025-N006_data")
+    data_folder = parcels.tutorial.download_example_dataset("NemoNorthSeaORCA025-N006_data")
     U = xr.open_mfdataset(data_folder.glob("*U.nc"))
     V = xr.open_mfdataset(data_folder.glob("*V.nc"))
     W = xr.open_mfdataset(data_folder.glob("*W.nc"))
@@ -505,7 +506,7 @@ def test_nemo_3D_curvilinear_fieldset(kernel):
 
 
 def test_mitgcm():
-    data_folder = parcels.download_example_dataset("MITgcm_example_data")
+    data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data")
     ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc")
 
     ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=ds_fields)
diff --git a/tests/test_convert.py b/tests/test_convert.py
index 15bac3cfbf..17824c7718 100644
--- a/tests/test_convert.py
+++ b/tests/test_convert.py
@@ -3,6 +3,7 @@
 
 import parcels
 import parcels.convert as convert
+import parcels.tutorial
 from parcels import FieldSet
 from parcels._core.utils import sgrid
 from parcels._datasets.structured.circulation_models import datasets as datasets_circulation_models
@@ -10,7 +11,7 @@
 
 
 def test_nemo_to_sgrid():
-    data_folder = parcels.download_example_dataset("NemoCurvilinear_data")
+    data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data")
     U = xr.open_mfdataset(data_folder.glob("*U.nc4"))
     V = xr.open_mfdataset(data_folder.glob("*V.nc4"))
     coords = xr.open_dataset(data_folder / "mesh_mask.nc4")
@@ -41,7 +42,7 @@ def test_nemo_to_sgrid():
 
 
 def test_convert_nemo_offsets():
-    data_folder = parcels.download_example_dataset("NemoCurvilinear_data")
+    data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data")
     U = xr.open_mfdataset(data_folder.glob("*U.nc4"))
     V = xr.open_mfdataset(data_folder.glob("*V.nc4"))
     coords = xr.open_dataset(data_folder / "mesh_mask.nc4")
@@ -56,7 +57,7 @@ def test_convert_nemo_offsets():
 
 
 def test_convert_mitgcm_offsets():
-    data_folder = parcels.download_example_dataset("MITgcm_example_data")
+    data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data")
     ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc")
     coords = ds_fields[["XG", "YG", "Zl", "time"]]
     ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=coords)
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index d642a544c7..43b4afc03a 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -8,6 +8,7 @@
 import xarray as xr
 from zarr.storage import MemoryStore
 
+import parcels.tutorial
 from parcels import (
     Field,
     FieldSet,
@@ -18,7 +19,6 @@
     Variable,
     VectorField,
     XGrid,
-    download_example_dataset,
 )
 from parcels._core.particle import Particle, create_particle_data, get_default_particle
 from parcels._core.utils.time import TimeInterval, timedelta_to_float
@@ -442,7 +442,7 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     dt = -timedelta(minutes=5)
 
     # TODO: Not ideal using the `download_example_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have
-    example_dataset_folder = download_example_dataset("CopernicusMarine_data_for_Argo_tutorial")
+    example_dataset_folder = parcels.tutorial.download_example_dataset("CopernicusMarine_data_for_Argo_tutorial")
     ds_in = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords")
     fields = {"U": ds_in["uo"], "V": ds_in["vo"]}
     ds_fset = copernicusmarine_to_sgrid(fields=fields)
diff --git a/tests/test_sigmagrids.py b/tests/test_sigmagrids.py
index de437c8fba..afedf9de1f 100644
--- a/tests/test_sigmagrids.py
+++ b/tests/test_sigmagrids.py
@@ -2,6 +2,7 @@
 import xarray as xr
 
 import parcels
+import parcels.tutorial
 from parcels import Particle, ParticleSet, Variable
 from parcels.kernels import AdvectionRK4_3D_CROCO, SampleOmegaCroco, convert_z_to_sigma_croco
 
@@ -17,7 +18,7 @@ def test_conversion_3DCROCO():
     lat, lon = ds.y_rho.values[y, x], ds.x_rho.values[y, x]
     ```
     """
-    data_folder = parcels.download_example_dataset("CROCOidealized_data")
+    data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data")
     ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc")
     fields = {
         "U": ds_fields["u"],
@@ -46,7 +47,7 @@ def test_conversion_3DCROCO():
 
 
 def test_advection_3DCROCO():
-    data_folder = parcels.download_example_dataset("CROCOidealized_data")
+    data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data")
     ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc")
     ds_fields.load()
 
diff --git a/tests/test_uxarray_fieldset.py b/tests/test_uxarray_fieldset.py
index 001c411c6c..d4a11f550b 100644
--- a/tests/test_uxarray_fieldset.py
+++ b/tests/test_uxarray_fieldset.py
@@ -2,6 +2,7 @@
 import pytest
 import uxarray as ux
 
+import parcels.tutorial
 from parcels import (
     Field,
     FieldSet,
@@ -9,7 +10,6 @@
     ParticleSet,
     UxGrid,
     VectorField,
-    download_example_dataset,
 )
 from parcels._datasets.unstructured.generic import datasets as datasets_unstructured
 from parcels.convert import fesom_to_ugrid, icon_to_ugrid
@@ -22,7 +22,7 @@
 
 @pytest.fixture
 def ds_fesom_channel() -> ux.UxDataset:
-    fesom_path = download_example_dataset("FESOM_periodic_channel")
+    fesom_path = parcels.tutorial.download_example_dataset("FESOM_periodic_channel")
     grid_path = f"{fesom_path}/fesom_channel.nc"
     data_path = [
         f"{fesom_path}/u.fesom_channel.nc",

From b97918da2a1ab07a8bf43b99da4be29e7305f5f2 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 10 Apr 2026 12:09:16 +0200
Subject: [PATCH 09/42] debug: Add code_path argument to

---
 src/parcels/tutorial.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index 56ff0531bd..2a573f667a 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -1,6 +1,7 @@
 import os
 from datetime import datetime, timedelta
 from pathlib import Path
+from typing import Literal
 
 import pooch
 import xarray as xr
@@ -231,7 +232,7 @@ def download_example_dataset(dataset: str, data_home=None):
 _TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr")
 
 
-def open_dataset(name: str):
+def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"):  # TODO: Remove code_path arg
     if name not in _DATASET_KEYS_AND_OPEN_PATHS:
         raise ValueError(
             f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True))
@@ -244,6 +245,8 @@ def open_dataset(name: str):
 
     with xr.set_options(use_new_combine_kwarg_defaults=True):
         ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
+    if code_path == "nc":
+        return ds
     path = _TMP_ZARR_FOLDER / f"{name}.zip"
     path.parent.mkdir(exist_ok=True)
     if not path.exists():

From 11daa887fccc6682f3ae6287c01e1c775d644a39 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 10 Apr 2026 14:09:15 +0200
Subject: [PATCH 10/42] Update docs and tests to use open_dataset

---
 tests/test_advection.py    | 19 ++++++++-----------
 tests/test_convert.py      | 17 +++++++----------
 tests/test_index_search.py |  8 ++------
 tests/test_particlefile.py |  5 ++---
 tests/test_sigmagrids.py   |  7 ++-----
 5 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/tests/test_advection.py b/tests/test_advection.py
index 4898838fa9..05450a4a93 100644
--- a/tests/test_advection.py
+++ b/tests/test_advection.py
@@ -456,10 +456,9 @@ def UpdateP(particles, fieldset):  # pragma: no cover
 
 
 def test_nemo_curvilinear_fieldset():
-    data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data")
-    U = xr.open_mfdataset(data_folder.glob("*U.nc4"))
-    V = xr.open_mfdataset(data_folder.glob("*V.nc4"))
-    coords = xr.open_dataset(data_folder / "mesh_mask.nc4")
+    U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U")
+    V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V")
+    coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask")
 
     ds = parcels.convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords)
 
@@ -477,11 +476,10 @@ def test_nemo_curvilinear_fieldset():
 
 @pytest.mark.parametrize("kernel", [AdvectionRK4, AdvectionRK4_3D])
 def test_nemo_3D_curvilinear_fieldset(kernel):
-    data_folder = parcels.tutorial.download_example_dataset("NemoNorthSeaORCA025-N006_data")
-    U = xr.open_mfdataset(data_folder.glob("*U.nc"))
-    V = xr.open_mfdataset(data_folder.glob("*V.nc"))
-    W = xr.open_mfdataset(data_folder.glob("*W.nc"))
-    coords = xr.open_dataset(data_folder / "coordinates.nc", decode_times=False)
+    U = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/U")
+    V = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/V")
+    W = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/W")
+    coords = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/mesh_mask")
 
     ds = parcels.convert.nemo_to_sgrid(fields=dict(U=U["uo"], V=V["vo"], W=W["wo"]), coords=coords)
 
@@ -506,8 +504,7 @@ def test_nemo_3D_curvilinear_fieldset(kernel):
 
 
 def test_mitgcm():
-    data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data")
-    ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc")
+    ds_fields = parcels.tutorial.open_dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant")
 
     ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=ds_fields)
     fieldset = FieldSet.from_sgrid_conventions(ds_fset)
diff --git a/tests/test_convert.py b/tests/test_convert.py
index 17824c7718..b286bb2689 100644
--- a/tests/test_convert.py
+++ b/tests/test_convert.py
@@ -11,10 +11,9 @@
 
 
 def test_nemo_to_sgrid():
-    data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data")
-    U = xr.open_mfdataset(data_folder.glob("*U.nc4"))
-    V = xr.open_mfdataset(data_folder.glob("*V.nc4"))
-    coords = xr.open_dataset(data_folder / "mesh_mask.nc4")
+    U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U")
+    V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V")
+    coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask")
 
     ds = convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords)
 
@@ -42,10 +41,9 @@ def test_nemo_to_sgrid():
 
 
 def test_convert_nemo_offsets():
-    data_folder = parcels.tutorial.download_example_dataset("NemoCurvilinear_data")
-    U = xr.open_mfdataset(data_folder.glob("*U.nc4"))
-    V = xr.open_mfdataset(data_folder.glob("*V.nc4"))
-    coords = xr.open_dataset(data_folder / "mesh_mask.nc4")
+    U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U")
+    V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V")
+    coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask")
 
     ds = convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords)
     fieldset = FieldSet.from_sgrid_conventions(ds)
@@ -57,8 +55,7 @@ def test_convert_nemo_offsets():
 
 
 def test_convert_mitgcm_offsets():
-    data_folder = parcels.tutorial.download_example_dataset("MITgcm_example_data")
-    ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc")
+    ds_fields = parcels.tutorial.open_dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant")
     coords = ds_fields[["XG", "YG", "Zl", "time"]]
     ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=coords)
     fieldset = FieldSet.from_sgrid_conventions(ds_fset)
diff --git a/tests/test_index_search.py b/tests/test_index_search.py
index 27e8ba04c2..7d6eb45bac 100644
--- a/tests/test_index_search.py
+++ b/tests/test_index_search.py
@@ -1,13 +1,12 @@
 import numpy as np
 import pytest
-import xarray as xr
 import xgcm
 
+import parcels.tutorial
 from parcels import Field, XGrid
 from parcels._core.index_search import _search_indices_curvilinear_2d
 from parcels._datasets.structured.generic import datasets
 from parcels.interpolators import XLinear
-from parcels.tutorial import download_example_dataset
 
 
 @pytest.fixture
@@ -56,10 +55,7 @@ def test_grid_indexing_fpoints(field_cone):
 
 
 def test_indexing_nemo_curvilinear():
-    data_folder = download_example_dataset("NemoCurvilinear_data")
-    ds = xr.open_mfdataset(
-        data_folder.glob("*.nc4"), combine="nested", data_vars="minimal", coords="minimal", compat="override"
-    )
+    ds = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask")
     ds = ds.isel({"time_counter": 0, "time": 0, "z_a": 0}, drop=True).rename(
         {"glamf": "lon", "gphif": "lat", "z": "depth"}
     )
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 43b4afc03a..ef4d1b565b 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -441,9 +441,8 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     runtime = timedelta(days=2)
     dt = -timedelta(minutes=5)
 
-    # TODO: Not ideal using the `download_example_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have
-    example_dataset_folder = parcels.tutorial.download_example_dataset("CopernicusMarine_data_for_Argo_tutorial")
-    ds_in = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords")
+    # TODO: Not ideal using the `open_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have
+    ds_in = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc")
     fields = {"U": ds_in["uo"], "V": ds_in["vo"]}
     ds_fset = copernicusmarine_to_sgrid(fields=fields)
     fieldset = FieldSet.from_sgrid_conventions(ds_fset)
diff --git a/tests/test_sigmagrids.py b/tests/test_sigmagrids.py
index afedf9de1f..537c8c101d 100644
--- a/tests/test_sigmagrids.py
+++ b/tests/test_sigmagrids.py
@@ -1,5 +1,4 @@
 import numpy as np
-import xarray as xr
 
 import parcels
 import parcels.tutorial
@@ -18,8 +17,7 @@ def test_conversion_3DCROCO():
     lat, lon = ds.y_rho.values[y, x], ds.x_rho.values[y, x]
     ```
     """
-    data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data")
-    ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc")
+    ds_fields = parcels.tutorial.open_dataset("CROCOidealized_data/data")
     fields = {
         "U": ds_fields["u"],
         "V": ds_fields["v"],
@@ -47,8 +45,7 @@ def test_conversion_3DCROCO():
 
 
 def test_advection_3DCROCO():
-    data_folder = parcels.tutorial.download_example_dataset("CROCOidealized_data")
-    ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc")
+    ds_fields = parcels.tutorial.open_dataset("CROCOidealized_data/data")
     ds_fields.load()
 
     fields = {

From 332fcab9f4123cbdbcea4e846c582a2c7b32ebdf Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 10 Apr 2026 14:27:32 +0200
Subject: [PATCH 11/42] Refactor _DATASET_KEYS_AND_OPEN_PATHS to
 _DATASET_KEYS_AND_CONFIGS

---
 src/parcels/tutorial.py | 93 +++++++++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 36 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index 2a573f667a..cc78e2e254 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -1,4 +1,6 @@
 import os
+from collections.abc import Callable
+from dataclasses import dataclass
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Literal
@@ -108,41 +110,50 @@
     "CROCOidealized_data": ["CROCO_idealized.nc"],
 }
 
+
+@dataclass
+class DatasetNCtoZarrConfig:
+    path_relative_to_root: str
+
+    # Function to apply to the dataset before the decoding the CF variables
+    pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = None
+
+
 # The first here is a human readable key, the latter the path to load the netcdf data
 # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`)
 # fmt: off
-_DATASET_KEYS_AND_OPEN_PATHS: dict[str, str] = dict([
-    ("MovingEddies_data/P", "MovingEddies_data/moving_eddiesP.nc"),
-    ("MovingEddies_data/U", "MovingEddies_data/moving_eddiesU.nc"),
-    ("MovingEddies_data/V", "MovingEddies_data/moving_eddiesV.nc"),
-    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", "MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"),
-    ("OFAM_example_data/U", "OFAM_example_data/OFAM_simple_U.nc"),
-    ("OFAM_example_data/V", "OFAM_example_data/OFAM_simple_V.nc"),
-    ("Peninsula_data/U", "Peninsula_data/peninsulaU.nc"),
-    ("Peninsula_data/V", "Peninsula_data/peninsulaV.nc"),
-    ("Peninsula_data/P", "Peninsula_data/peninsulaP.nc"),
-    ("Peninsula_data/T", "Peninsula_data/peninsulaT.nc"),
-    ("GlobCurrent_example_data/data.nc", "GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"),
-    ("DecayingMovingEddy_data/U", "DecayingMovingEddy_data/decaying_moving_eddyU.nc"),
-    ("DecayingMovingEddy_data/V", "DecayingMovingEddy_data/decaying_moving_eddyV.nc"),
-    ("FESOM_periodic_channel/fesom_channel", "FESOM_periodic_channel/fesom_channel.nc"),
-    ("FESOM_periodic_channel/u.fesom_channel", "FESOM_periodic_channel/u.fesom_channel.nc"),
-    ("FESOM_periodic_channel/v.fesom_channel", "FESOM_periodic_channel/v.fesom_channel.nc"),
-    ("FESOM_periodic_channel/w.fesom_channel", "FESOM_periodic_channel/w.fesom_channel.nc"),
-    ("NemoCurvilinear_data_zonal/U", "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"),
-    ("NemoCurvilinear_data_zonal/V", "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"),
-    ("NemoCurvilinear_data_zonal/mesh_mask", "NemoCurvilinear_data/mesh_mask.nc4"),
-    ("NemoNorthSeaORCA025-N006_data/U", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"),
-    ("NemoNorthSeaORCA025-N006_data/V", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"),
-    ("NemoNorthSeaORCA025-N006_data/W", "NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"),
-    ("NemoNorthSeaORCA025-N006_data/mesh_mask", "NemoNorthSeaORCA025-N006_data/coordinates.nc"),
+_DATASET_KEYS_AND_CONFIGS: dict[str, DatasetNCtoZarrConfig] = dict([
+    ("MovingEddies_data/P", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesP.nc")),
+    ("MovingEddies_data/U", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesU.nc")),
+    ("MovingEddies_data/V", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesV.nc")),
+    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", DatasetNCtoZarrConfig("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")),
+    ("OFAM_example_data/U", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_U.nc")),
+    ("OFAM_example_data/V", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_V.nc")),
+    ("Peninsula_data/U", DatasetNCtoZarrConfig("Peninsula_data/peninsulaU.nc")),
+    ("Peninsula_data/V", DatasetNCtoZarrConfig("Peninsula_data/peninsulaV.nc")),
+    ("Peninsula_data/P", DatasetNCtoZarrConfig("Peninsula_data/peninsulaP.nc")),
+    ("Peninsula_data/T", DatasetNCtoZarrConfig("Peninsula_data/peninsulaT.nc")),
+    ("GlobCurrent_example_data/data.nc", DatasetNCtoZarrConfig("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
+    ("DecayingMovingEddy_data/U", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyU.nc")),
+    ("DecayingMovingEddy_data/V", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyV.nc")),
+    ("FESOM_periodic_channel/fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/fesom_channel.nc")),
+    ("FESOM_periodic_channel/u.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/u.fesom_channel.nc")),
+    ("FESOM_periodic_channel/v.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/v.fesom_channel.nc")),
+    ("FESOM_periodic_channel/w.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/w.fesom_channel.nc")),
+    ("NemoCurvilinear_data_zonal/U", DatasetNCtoZarrConfig("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")),
+    ("NemoCurvilinear_data_zonal/V", DatasetNCtoZarrConfig("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")),
+    ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4")),
+    ("NemoNorthSeaORCA025-N006_data/U", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")),
+    ("NemoNorthSeaORCA025-N006_data/V", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")),
+    ("NemoNorthSeaORCA025-N006_data/W", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")),
+    ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc")),
     # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
-    ("SWASH_data/data", "SWASH_data/field_00655*.nc"),
-    ("WOA_data/data", "WOA_data/woa18_decav_t*_04.nc"),
-    ("CROCOidealized_data/data", "CROCOidealized_data/CROCO_idealized.nc"),
+    ("SWASH_data/data", DatasetNCtoZarrConfig("SWASH_data/field_00655*.nc")),
+    ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc")),
+    ("CROCOidealized_data/data", DatasetNCtoZarrConfig("CROCOidealized_data/CROCO_idealized.nc")),
 ])
 # fmt: on
 
@@ -186,8 +197,8 @@ def list_example_datasets(v4=False) -> list[str]:  # TODO: Remove v4 flag when m
         The names of the available example datasets.
     """
     if v4:
-        return list(_DATASET_KEYS_AND_OPEN_PATHS.keys())
-    return list(set(v.split("/")[0] for v in _DATASET_KEYS_AND_OPEN_PATHS.values()))
+        return list(_DATASET_KEYS_AND_CONFIGS.keys())
+    return list(set(v.path_relative_to_root.split("/")[0] for v in _DATASET_KEYS_AND_CONFIGS.values()))
 
 
 def download_example_dataset(dataset: str, data_home=None):
@@ -233,18 +244,28 @@ def download_example_dataset(dataset: str, data_home=None):
 
 
 def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"):  # TODO: Remove code_path arg
-    if name not in _DATASET_KEYS_AND_OPEN_PATHS:
+    try:
+        cfg = _DATASET_KEYS_AND_CONFIGS[name]
+    except KeyError as e:
         raise ValueError(
             f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True))
-        )
+        ) from e
 
     open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False)
+    open_dataset_kwargs = dict(decode_cf=False)
     # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
-    download_dataset_stem, rest = _DATASET_KEYS_AND_OPEN_PATHS[name].split("/", maxsplit=1)
+    download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1)
     folder = download_example_dataset(download_dataset_stem)
 
     with xr.set_options(use_new_combine_kwarg_defaults=True):
+        # return f"{folder}/{rest}"
         ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
+
+    if cfg.pre_decode_cf_callable is not None:
+        ds = cfg.pre_decode_cf_callable(ds)
+
+    ds = xr.decode_cf(ds)
+
     if code_path == "nc":
         return ds
     path = _TMP_ZARR_FOLDER / f"{name}.zip"

From f8df2386d0aa73f172d11aec41e8f3dabdbcd025 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 10 Apr 2026 15:26:28 +0200
Subject: [PATCH 12/42] Fix dataset ingestion

---
 src/parcels/tutorial.py    | 23 ++++++++++++++++++++---
 tests/test_index_search.py |  4 +---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index cc78e2e254..e5800d1851 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -119,6 +119,23 @@ class DatasetNCtoZarrConfig:
     pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = None
 
 
+def _preprocess_drop_time_from_mesh1(ds: xr.Dataset) -> xr.Dataset:
+    # For some reason on the mesh "NemoNorthSeaORCA025-N006_data/coordinates.nc" there are time dimensions. These dimension also has broken cf-time metadata
+    # this fixes that
+    return ds.isel(time=0).drop(["time", "time_steps"])
+
+
+def _preprocess_drop_time_from_mesh2(ds: xr.Dataset) -> xr.Dataset:
+    # For some reason on the mesh "NemoCurvilinear_data_zonal/mesh_mask" there is a time dimension.
+    return ds.isel(time=0).drop(["time"])
+
+
+def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset:
+    # For some reason "WOA_data/woa18_decav_t*_04.nc" looks to be simulation data using CF time (i.e., months of 30 days), however the calendar attribute isn't set.
+    ds.time.attrs.update({"calendar": "360_day"})
+    return ds
+
+
 # The first here is a human readable key, the latter the path to load the netcdf data
 # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`)
 # fmt: off
@@ -145,14 +162,14 @@ class DatasetNCtoZarrConfig:
     ("FESOM_periodic_channel/w.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/w.fesom_channel.nc")),
     ("NemoCurvilinear_data_zonal/U", DatasetNCtoZarrConfig("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")),
     ("NemoCurvilinear_data_zonal/V", DatasetNCtoZarrConfig("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")),
-    ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4")),
+    ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)),
     ("NemoNorthSeaORCA025-N006_data/U", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")),
     ("NemoNorthSeaORCA025-N006_data/V", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")),
     ("NemoNorthSeaORCA025-N006_data/W", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")),
-    ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc")),
+    ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)),
     # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
     ("SWASH_data/data", DatasetNCtoZarrConfig("SWASH_data/field_00655*.nc")),
-    ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc")),
+    ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)),
     ("CROCOidealized_data/data", DatasetNCtoZarrConfig("CROCOidealized_data/CROCO_idealized.nc")),
 ])
 # fmt: on
diff --git a/tests/test_index_search.py b/tests/test_index_search.py
index 7d6eb45bac..bb7ec3f3b1 100644
--- a/tests/test_index_search.py
+++ b/tests/test_index_search.py
@@ -56,9 +56,7 @@ def test_grid_indexing_fpoints(field_cone):
 
 def test_indexing_nemo_curvilinear():
     ds = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask")
-    ds = ds.isel({"time_counter": 0, "time": 0, "z_a": 0}, drop=True).rename(
-        {"glamf": "lon", "gphif": "lat", "z": "depth"}
-    )
+    ds = ds.isel({"z_a": 0}, drop=True).rename({"glamf": "lon", "gphif": "lat", "z": "depth"})
     xgcm_grid = xgcm.Grid(ds, coords={"X": {"left": "x"}, "Y": {"left": "y"}}, periodic=False, autoparse_metadata=False)
     grid = XGrid(xgcm_grid, mesh="spherical")
 

From 7c212ff78b39134099cd80839b2f141f325ae671 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 10 Apr 2026 15:31:36 +0200
Subject: [PATCH 13/42] Remove nc code path

was used for testing
---
 src/parcels/tutorial.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index e5800d1851..7cdcc345a4 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -3,7 +3,6 @@
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Literal
 
 import pooch
 import xarray as xr
@@ -260,7 +259,7 @@ def download_example_dataset(dataset: str, data_home=None):
 _TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr")
 
 
-def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"):  # TODO: Remove code_path arg
+def open_dataset(name: str):  # TODO: Remove code_path arg
     try:
         cfg = _DATASET_KEYS_AND_CONFIGS[name]
     except KeyError as e:
@@ -268,7 +267,6 @@ def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"):  # TODO: R
             f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True))
         ) from e
 
-    open_dataset_kwargs = dict(decode_timedelta=False, decode_cf=False)
     open_dataset_kwargs = dict(decode_cf=False)
     # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
     download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1)
@@ -283,14 +281,12 @@ def open_dataset(name: str, code_path: Literal["nc", "zarr"] = "nc"):  # TODO: R
 
     ds = xr.decode_cf(ds)
 
-    if code_path == "nc":
-        return ds
     path = _TMP_ZARR_FOLDER / f"{name}.zip"
-    path.parent.mkdir(exist_ok=True)
+    path.parent.mkdir(exist_ok=True, parents=True)
     if not path.exists():
         with zarr.storage.ZipStore(path, mode="w") as store:
             ds.to_zarr(store)
-    return xr.open_zarr(path, **open_dataset_kwargs)
+    return xr.open_zarr(path)
 
 
 def _v4_compat_patch(fname, action, pup):

From fdccf31e99b1916bdff323a79b6c2532525504bd Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 10 Apr 2026 15:36:34 +0200
Subject: [PATCH 14/42] Cleanup

---
 src/parcels/tutorial.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index 7cdcc345a4..e78d4b8629 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -268,12 +268,11 @@ def open_dataset(name: str):  # TODO: Remove code_path arg
         ) from e
 
     open_dataset_kwargs = dict(decode_cf=False)
-    # assert not dataset.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
+    assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
     download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1)
     folder = download_example_dataset(download_dataset_stem)
 
     with xr.set_options(use_new_combine_kwarg_defaults=True):
-        # return f"{folder}/{rest}"
         ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
 
     if cfg.pre_decode_cf_callable is not None:

From 788313d22c5c10467e9ac807f2acd9bd2fdf42ee Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:40:26 +0200
Subject: [PATCH 15/42] Lift constant definition

---
 src/parcels/tutorial.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index e78d4b8629..b4c297628a 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -18,6 +18,11 @@
 
 _DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data"
 
+DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA")
+if DATA_HOME is None:
+    DATA_HOME = pooch.os_cache("parcels")
+
+
 # Keys are the dataset names. Values are the filenames in the dataset folder. Note that
 # you can specify subfolders in the dataset folder putting slashes in the filename list.
 # e.g.,
@@ -191,10 +196,7 @@ def _create_pooch_registry() -> dict[str, None]:
 
 def _get_pooch(data_home=None):
     if data_home is None:
-        data_home = os.environ.get("PARCELS_EXAMPLE_DATA")
-    if data_home is None:
-        data_home = pooch.os_cache("parcels")
-
+        data_home = DATA_HOME
     return pooch.create(
         path=data_home,
         base_url=_DATA_URL,

From 110f5d656dc017059ead924b5cdef99c15e4db77 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:46:10 +0200
Subject: [PATCH 16/42] Remove data_home option from download_dataset

---
 src/parcels/tutorial.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index b4c297628a..d362c21bb3 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -194,11 +194,9 @@ def _create_pooch_registry() -> dict[str, None]:
 POOCH_REGISTRY = _create_pooch_registry()
 
 
-def _get_pooch(data_home=None):
-    if data_home is None:
-        data_home = DATA_HOME
+def _get_pooch():
     return pooch.create(
-        path=data_home,
+        path=DATA_HOME,
         base_url=_DATA_URL,
         registry=POOCH_REGISTRY,
     )
@@ -219,20 +217,18 @@ def list_example_datasets(v4=False) -> list[str]:  # TODO: Remove v4 flag when m
     return list(set(v.path_relative_to_root.split("/")[0] for v in _DATASET_KEYS_AND_CONFIGS.values()))
 
 
-def download_example_dataset(dataset: str, data_home=None):
+def download_example_dataset(dataset: str):
     """Load an example dataset from the parcels website.
 
     This function provides quick access to a small number of example datasets
     that are useful in documentation and testing in parcels.
 
+    The location where the data is downloaded can be set using the environment variable PARCELS_EXAMPLE_DATA .
+
     Parameters
     ----------
     dataset : str
         Name of the dataset to load.
-    data_home : pathlike, optional
-        The directory in which to cache data. If not specified, the value
-        of the ``PARCELS_EXAMPLE_DATA`` environment variable, if any, is used.
-        Otherwise the default location is assigned by :func:`get_data_home`.
 
     Returns
     -------
@@ -244,7 +240,7 @@ def download_example_dataset(dataset: str, data_home=None):
         raise ValueError(
             f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys())
         )
-    odie = _get_pooch(data_home=data_home)
+    odie = _get_pooch()
 
     cache_folder = Path(odie.path)
     dataset_folder = cache_folder / dataset

From da3975d8ff3b84b7cacea4295ac52a1eade15612 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:48:58 +0200
Subject: [PATCH 17/42] Make pooch instance a constant

---
 src/parcels/tutorial.py | 28 ++++++++++++----------------
 tests/test_tutorial.py  |  4 ++--
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index d362c21bb3..ebe33da28c 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -18,9 +18,9 @@
 
 _DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data"
 
-DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA")
-if DATA_HOME is None:
-    DATA_HOME = pooch.os_cache("parcels")
+_DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA")
+if _DATA_HOME is None:
+    _DATA_HOME = pooch.os_cache("parcels")
 
 
 # Keys are the dataset names. Values are the filenames in the dataset folder. Note that
@@ -191,15 +191,12 @@ def _create_pooch_registry() -> dict[str, None]:
     return registry
 
 
-POOCH_REGISTRY = _create_pooch_registry()
-
-
-def _get_pooch():
-    return pooch.create(
-        path=DATA_HOME,
-        base_url=_DATA_URL,
-        registry=POOCH_REGISTRY,
-    )
+_POOCH_REGISTRY = _create_pooch_registry()
+_ODIE = pooch.create(
+    path=_DATA_HOME,
+    base_url=_DATA_URL,
+    registry=_POOCH_REGISTRY,
+)
 
 
 def list_example_datasets(v4=False) -> list[str]:  # TODO: Remove v4 flag when migrating to open_dataset
@@ -240,15 +237,14 @@ def download_example_dataset(dataset: str):
         raise ValueError(
             f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys())
         )
-    odie = _get_pooch()
 
-    cache_folder = Path(odie.path)
+    cache_folder = Path(_ODIE.path)
     dataset_folder = cache_folder / dataset
 
-    for file_name in odie.registry:
+    for file_name in _ODIE.registry:
         if file_name.startswith(dataset):
             should_patch = dataset == "GlobCurrent_example_data"
-            odie.fetch(file_name, processor=_v4_compat_patch if should_patch else None)
+            _ODIE.fetch(file_name, processor=_v4_compat_patch if should_patch else None)
 
     return dataset_folder
 
diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py
index 4ab1636c2b..9ae0b88aa7 100644
--- a/tests/test_tutorial.py
+++ b/tests/test_tutorial.py
@@ -3,14 +3,14 @@
 import xarray as xr
 
 from parcels.tutorial import (
-    _get_pooch,
+    _ODIE,
     download_example_dataset,
     list_example_datasets,
     open_dataset,
 )
 
 
-@pytest.mark.parametrize("url", [_get_pooch().get_url(filename) for filename in _get_pooch().registry.keys()])
+@pytest.mark.parametrize("url", [_ODIE.get_url(filename) for filename in _ODIE.registry.keys()])
 def test_pooch_registry_url_reponse(url):
     response = requests.head(url)
     assert not (400 <= response.status_code < 600)

From ee23d52e44951c0fd1349bb378be2bde461fb030 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:59:26 +0200
Subject: [PATCH 18/42] Fix test failures

---
 tests/test_tutorial.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py
index 9ae0b88aa7..a958949514 100644
--- a/tests/test_tutorial.py
+++ b/tests/test_tutorial.py
@@ -10,6 +10,12 @@
 )
 
 
+@pytest.fixture(scope="function")
+def tmp_path_parcels_example_data(monkeypatch, tmp_path):
+    monkeypatch.setenv("PARCELS_EXAMPLE_DATA", str(tmp_path))
+    return tmp_path
+
+
 @pytest.mark.parametrize("url", [_ODIE.get_url(filename) for filename in _ODIE.registry.keys()])
 def test_pooch_registry_url_reponse(url):
     response = requests.head(url)
@@ -17,17 +23,16 @@ def test_pooch_registry_url_reponse(url):
 
 
 @pytest.mark.parametrize("dataset", list_example_datasets()[:1])
-def test_download_example_dataset_folder_creation(tmp_path, dataset):
-    dataset_folder_path = download_example_dataset(dataset, data_home=tmp_path)
+def test_download_example_dataset_folder_creation(dataset):
+    dataset_folder_path = download_example_dataset(dataset)
 
     assert dataset_folder_path.exists()
     assert dataset_folder_path.name == dataset
-    assert dataset_folder_path.parent == tmp_path
 
 
-def test_download_non_existing_example_dataset(tmp_path):
+def test_download_non_existing_example_dataset(tmp_path_parcels_example_data):
     with pytest.raises(ValueError):
-        download_example_dataset("non_existing_dataset", data_home=tmp_path)
+        download_example_dataset("non_existing_dataset")
 
 
 def test_download_example_dataset_no_data_home():

From 7c67e89a11db948dc6665d8beb0a03aa0e67bb79 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 11:23:17 +0200
Subject: [PATCH 19/42] Refactor

---
 src/parcels/tutorial.py | 180 ++++++++++++++++++++++------------------
 1 file changed, 97 insertions(+), 83 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index ebe33da28c..d092d9bd0e 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -1,12 +1,11 @@
+import abc
 import os
 from collections.abc import Callable
-from dataclasses import dataclass
 from datetime import datetime, timedelta
 from pathlib import Path
 
 import pooch
 import xarray as xr
-import zarr
 
 from parcels._v3to4 import patch_dataset_v4_compat
 
@@ -115,12 +114,66 @@
 }
 
 
-@dataclass
-class DatasetNCtoZarrConfig:
-    path_relative_to_root: str
+def _create_pooch_registry() -> dict[str, None]:
+    """Collapses the mapping of dataset names to filenames into a pooch registry.
+
+    Hashes are set to None for all files.
+    """
+    registry: dict[str, None] = {}
+    for dataset, filenames in _EXAMPLE_DATA_FILES.items():
+        for filename in filenames:
+            registry[f"{dataset}/{filename}"] = None
+    return registry
+
+
+_POOCH_REGISTRY = _create_pooch_registry()
+_ODIE = pooch.create(
+    path=_DATA_HOME,
+    base_url=_DATA_URL,
+    registry=_POOCH_REGISTRY,
+)
+
+
+class _ParcelsDataset(abc.ABC):
+    @abc.abstractmethod
+    def open_dataset(self) -> xr.Dataset: ...
+
+
+class _V3Dataset(_ParcelsDataset):
+    def __init__(self, path_relative_to_root: str, pre_decode_cf_callable=None):
+        self.path_relative_to_root = path_relative_to_root  # glob is allowed
+
+        # Function to apply to the dataset before the decoding the CF variables
+        self.pup = _ODIE
+        self.pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = pre_decode_cf_callable
+        self.v3_dataset_name = path_relative_to_root.split("/")[0]
+
+    def open_dataset(self) -> xr.Dataset:
+        self.download_relevant_files()
+        with xr.set_options(use_new_combine_kwarg_defaults=True):
+            ds = xr.open_mfdataset(Path(self.pup.path) / self.path_relative_to_root, decode_cf=False)
+
+        if self.pre_decode_cf_callable is not None:
+            ds = self.pre_decode_cf_callable(ds)
+
+        ds = xr.decode_cf(ds)
+        return ds
+
+    def download_relevant_files(self) -> None:
+        for file in self.pup.registry:
+            if self.v3_dataset_name in file:
+                self.pup.fetch(file)
+        return
+
 
-    # Function to apply to the dataset before the decoding the CF variables
-    pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = None
+class _ZarrZipDataset(_ParcelsDataset):
+    def __init__(self, path_relative_to_root):
+        self.pup = _ODIE
+        self.path_relative_to_root = path_relative_to_root
+
+    def open_dataset(self) -> xr.Dataset:
+        self.pup.fetch(self.path_relative_to_root)
+        return xr.open_zarr(Path(self.pup.path) / self.path_relative_to_root)
 
 
 def _preprocess_drop_time_from_mesh1(ds: xr.Dataset) -> xr.Dataset:
@@ -143,62 +196,42 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset:
 # The first here is a human readable key, the latter the path to load the netcdf data
 # (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`)
 # fmt: off
-_DATASET_KEYS_AND_CONFIGS: dict[str, DatasetNCtoZarrConfig] = dict([
-    ("MovingEddies_data/P", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesP.nc")),
-    ("MovingEddies_data/U", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesU.nc")),
-    ("MovingEddies_data/V", DatasetNCtoZarrConfig("MovingEddies_data/moving_eddiesV.nc")),
-    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", DatasetNCtoZarrConfig("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")),
-    ("OFAM_example_data/U", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_U.nc")),
-    ("OFAM_example_data/V", DatasetNCtoZarrConfig("OFAM_example_data/OFAM_simple_V.nc")),
-    ("Peninsula_data/U", DatasetNCtoZarrConfig("Peninsula_data/peninsulaU.nc")),
-    ("Peninsula_data/V", DatasetNCtoZarrConfig("Peninsula_data/peninsulaV.nc")),
-    ("Peninsula_data/P", DatasetNCtoZarrConfig("Peninsula_data/peninsulaP.nc")),
-    ("Peninsula_data/T", DatasetNCtoZarrConfig("Peninsula_data/peninsulaT.nc")),
-    ("GlobCurrent_example_data/data.nc", DatasetNCtoZarrConfig("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", DatasetNCtoZarrConfig("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
-    ("DecayingMovingEddy_data/U", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyU.nc")),
-    ("DecayingMovingEddy_data/V", DatasetNCtoZarrConfig("DecayingMovingEddy_data/decaying_moving_eddyV.nc")),
-    ("FESOM_periodic_channel/fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/fesom_channel.nc")),
-    ("FESOM_periodic_channel/u.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/u.fesom_channel.nc")),
-    ("FESOM_periodic_channel/v.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/v.fesom_channel.nc")),
-    ("FESOM_periodic_channel/w.fesom_channel", DatasetNCtoZarrConfig("FESOM_periodic_channel/w.fesom_channel.nc")),
-    ("NemoCurvilinear_data_zonal/U", DatasetNCtoZarrConfig("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")),
-    ("NemoCurvilinear_data_zonal/V", DatasetNCtoZarrConfig("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")),
-    ("NemoCurvilinear_data_zonal/mesh_mask", DatasetNCtoZarrConfig("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)),
-    ("NemoNorthSeaORCA025-N006_data/U", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")),
-    ("NemoNorthSeaORCA025-N006_data/V", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")),
-    ("NemoNorthSeaORCA025-N006_data/W", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")),
-    ("NemoNorthSeaORCA025-N006_data/mesh_mask", DatasetNCtoZarrConfig("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)),
+_DATASET_KEYS_AND_CONFIGS: dict[str, _V3Dataset] = dict([
+    ("MovingEddies_data/P", _V3Dataset("MovingEddies_data/moving_eddiesP.nc")),
+    ("MovingEddies_data/U", _V3Dataset("MovingEddies_data/moving_eddiesU.nc")),
+    ("MovingEddies_data/V", _V3Dataset("MovingEddies_data/moving_eddiesV.nc")),
+    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", _V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")),
+    ("OFAM_example_data/U", _V3Dataset("OFAM_example_data/OFAM_simple_U.nc")),
+    ("OFAM_example_data/V", _V3Dataset("OFAM_example_data/OFAM_simple_V.nc")),
+    ("Peninsula_data/U", _V3Dataset("Peninsula_data/peninsulaU.nc")),
+    ("Peninsula_data/V", _V3Dataset("Peninsula_data/peninsulaV.nc")),
+    ("Peninsula_data/P", _V3Dataset("Peninsula_data/peninsulaP.nc")),
+    ("Peninsula_data/T", _V3Dataset("Peninsula_data/peninsulaT.nc")),
+    ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
+    ("DecayingMovingEddy_data/U", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc")),
+    ("DecayingMovingEddy_data/V", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc")),
+    ("FESOM_periodic_channel/fesom_channel", _V3Dataset("FESOM_periodic_channel/fesom_channel.nc")),
+    ("FESOM_periodic_channel/u.fesom_channel", _V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc")),
+    ("FESOM_periodic_channel/v.fesom_channel", _V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc")),
+    ("FESOM_periodic_channel/w.fesom_channel", _V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc")),
+    ("NemoCurvilinear_data_zonal/U", _V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")),
+    ("NemoCurvilinear_data_zonal/V", _V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")),
+    ("NemoCurvilinear_data_zonal/mesh_mask", _V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)),
+    ("NemoNorthSeaORCA025-N006_data/U", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")),
+    ("NemoNorthSeaORCA025-N006_data/V", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")),
+    ("NemoNorthSeaORCA025-N006_data/W", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")),
+    ("NemoNorthSeaORCA025-N006_data/mesh_mask", _V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)),
     # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
-    ("SWASH_data/data", DatasetNCtoZarrConfig("SWASH_data/field_00655*.nc")),
-    ("WOA_data/data", DatasetNCtoZarrConfig("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)),
-    ("CROCOidealized_data/data", DatasetNCtoZarrConfig("CROCOidealized_data/CROCO_idealized.nc")),
+    ("SWASH_data/data", _V3Dataset("SWASH_data/field_00655*.nc")),
+    ("WOA_data/data", _V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)),
+    ("CROCOidealized_data/data", _V3Dataset("CROCOidealized_data/CROCO_idealized.nc")),
 ])
 # fmt: on
 
 
-def _create_pooch_registry() -> dict[str, None]:
-    """Collapses the mapping of dataset names to filenames into a pooch registry.
-
-    Hashes are set to None for all files.
-    """
-    registry: dict[str, None] = {}
-    for dataset, filenames in _EXAMPLE_DATA_FILES.items():
-        for filename in filenames:
-            registry[f"{dataset}/{filename}"] = None
-    return registry
-
-
-_POOCH_REGISTRY = _create_pooch_registry()
-_ODIE = pooch.create(
-    path=_DATA_HOME,
-    base_url=_DATA_URL,
-    registry=_POOCH_REGISTRY,
-)
-
-
 def list_example_datasets(v4=False) -> list[str]:  # TODO: Remove v4 flag when migrating to open_dataset
     """List the available example datasets.
 
@@ -249,37 +282,18 @@ def download_example_dataset(dataset: str):
     return dataset_folder
 
 
-# Just creating a temp folder to help during the migration
-_TMP_ZARR_FOLDER = Path("../parcels-data/data-zarr")
-
-
-def open_dataset(name: str):  # TODO: Remove code_path arg
+def open_dataset(name: str):
     try:
-        cfg = _DATASET_KEYS_AND_CONFIGS[name]
+        dataset_config = _DATASET_KEYS_AND_CONFIGS[name]
     except KeyError as e:
         raise ValueError(
             f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True))
         ) from e
+    assert not name.endswith((".zarr", ".zip", ".nc")), (
+        "Dataset name should not have suffix"
+    )  # TODO: Move to test_tutorial
 
-    open_dataset_kwargs = dict(decode_cf=False)
-    assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
-    download_dataset_stem, rest = cfg.path_relative_to_root.split("/", maxsplit=1)
-    folder = download_example_dataset(download_dataset_stem)
-
-    with xr.set_options(use_new_combine_kwarg_defaults=True):
-        ds = xr.open_mfdataset(f"{folder}/{rest}", **open_dataset_kwargs)
-
-    if cfg.pre_decode_cf_callable is not None:
-        ds = cfg.pre_decode_cf_callable(ds)
-
-    ds = xr.decode_cf(ds)
-
-    path = _TMP_ZARR_FOLDER / f"{name}.zip"
-    path.parent.mkdir(exist_ok=True, parents=True)
-    if not path.exists():
-        with zarr.storage.ZipStore(path, mode="w") as store:
-            ds.to_zarr(store)
-    return xr.open_zarr(path)
+    return dataset_config.open_dataset()
 
 
 def _v4_compat_patch(fname, action, pup):

From 89d48c97c08d481e1f185924443b9908c6af1530 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 11:50:30 +0200
Subject: [PATCH 20/42] Refactor and remove legacy tooling

---
 src/parcels/tutorial.py | 237 +++++++++++++++-------------------------
 tests/test_tutorial.py  |  40 ++-----
 2 files changed, 97 insertions(+), 180 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index d092d9bd0e..0a127f5e80 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -9,7 +9,7 @@
 
 from parcels._v3to4 import patch_dataset_v4_compat
 
-__all__ = ["download_example_dataset", "list_example_datasets"]
+__all__ = ["list_datasets", "open_dataset"]
 
 # When modifying existing datasets in a backwards incompatible way,
 # make a new release in the repo and update the DATA_REPO_TAG to the new tag
@@ -34,99 +34,88 @@
 #     └── file2.nc
 #
 # See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets
-_EXAMPLE_DATA_FILES: dict[str, list[str]] = {
-    "MovingEddies_data": [
-        "moving_eddiesP.nc",
-        "moving_eddiesU.nc",
-        "moving_eddiesV.nc",
-    ],
-    "MITgcm_example_data": ["mitgcm_UV_surface_zonally_reentrant.nc"],
-    "OFAM_example_data": ["OFAM_simple_U.nc", "OFAM_simple_V.nc"],
-    "Peninsula_data": [
-        "peninsulaU.nc",
-        "peninsulaV.nc",
-        "peninsulaP.nc",
-        "peninsulaT.nc",
-    ],
-    "GlobCurrent_example_data": [
-        f"{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"
+_POOCH_REGISTRY_FILES: list[str] = (
+    [
+        "MovingEddies_data/moving_eddiesP.nc",
+        "MovingEddies_data/moving_eddiesU.nc",
+        "MovingEddies_data/moving_eddiesV.nc",
+    ]
+    + ["MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"]
+    + ["OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"]
+    + [
+        "Peninsula_data/peninsulaU.nc",
+        "Peninsula_data/peninsulaV.nc",
+        "Peninsula_data/peninsulaP.nc",
+        "Peninsula_data/peninsulaT.nc",
+    ]
+    + [
+        f"GlobCurrent_example_data/{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"
         for date in ([datetime(2002, 1, 1) + timedelta(days=x) for x in range(0, 365)] + [datetime(2003, 1, 1)])
-    ],
-    "CopernicusMarine_data_for_Argo_tutorial": [
-        "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-        "cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-        "cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-    ],
-    "DecayingMovingEddy_data": [
-        "decaying_moving_eddyU.nc",
-        "decaying_moving_eddyV.nc",
-    ],
-    "FESOM_periodic_channel": [
-        "fesom_channel.nc",
-        "u.fesom_channel.nc",
-        "v.fesom_channel.nc",
-        "w.fesom_channel.nc",
-    ],
-    "NemoCurvilinear_data": [
-        "U_purely_zonal-ORCA025_grid_U.nc4",
-        "V_purely_zonal-ORCA025_grid_V.nc4",
-        "mesh_mask.nc4",
-    ],
-    "NemoNorthSeaORCA025-N006_data": [
-        "ORCA025-N06_20000104d05U.nc",
-        "ORCA025-N06_20000109d05U.nc",
-        "ORCA025-N06_20000114d05U.nc",
-        "ORCA025-N06_20000119d05U.nc",
-        "ORCA025-N06_20000124d05U.nc",
-        "ORCA025-N06_20000129d05U.nc",
-        "ORCA025-N06_20000104d05V.nc",
-        "ORCA025-N06_20000109d05V.nc",
-        "ORCA025-N06_20000114d05V.nc",
-        "ORCA025-N06_20000119d05V.nc",
-        "ORCA025-N06_20000124d05V.nc",
-        "ORCA025-N06_20000129d05V.nc",
-        "ORCA025-N06_20000104d05W.nc",
-        "ORCA025-N06_20000109d05W.nc",
-        "ORCA025-N06_20000114d05W.nc",
-        "ORCA025-N06_20000119d05W.nc",
-        "ORCA025-N06_20000124d05W.nc",
-        "ORCA025-N06_20000129d05W.nc",
-        "coordinates.nc",
-    ],
-    "POPSouthernOcean_data": [
-        "t.x1_SAMOC_flux.169000.nc",
-        "t.x1_SAMOC_flux.169001.nc",
-        "t.x1_SAMOC_flux.169002.nc",
-        "t.x1_SAMOC_flux.169003.nc",
-        "t.x1_SAMOC_flux.169004.nc",
-        "t.x1_SAMOC_flux.169005.nc",
-    ],
-    "SWASH_data": [
-        "field_0065532.nc",
-        "field_0065537.nc",
-        "field_0065542.nc",
-        "field_0065548.nc",
-        "field_0065552.nc",
-        "field_0065557.nc",
-    ],
-    "WOA_data": [f"woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)],
-    "CROCOidealized_data": ["CROCO_idealized.nc"],
-}
-
-
-def _create_pooch_registry() -> dict[str, None]:
-    """Collapses the mapping of dataset names to filenames into a pooch registry.
-
-    Hashes are set to None for all files.
-    """
-    registry: dict[str, None] = {}
-    for dataset, filenames in _EXAMPLE_DATA_FILES.items():
-        for filename in filenames:
-            registry[f"{dataset}/{filename}"] = None
-    return registry
+    ]
+    + [
+        "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+        "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+        "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+    ]
+    + [
+        "DecayingMovingEddy_data/decaying_moving_eddyU.nc",
+        "DecayingMovingEddy_data/decaying_moving_eddyV.nc",
+    ]
+    + [
+        "FESOM_periodic_channel/fesom_channel.nc",
+        "FESOM_periodic_channel/u.fesom_channel.nc",
+        "FESOM_periodic_channel/v.fesom_channel.nc",
+        "FESOM_periodic_channel/w.fesom_channel.nc",
+    ]
+    + [
+        "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4",
+        "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4",
+        "NemoCurvilinear_data/mesh_mask.nc4",
+    ]
+    + [
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05U.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05U.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05U.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05U.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05U.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05U.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05V.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05V.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05V.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05V.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05V.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05V.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05W.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05W.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05W.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05W.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05W.nc",
+        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05W.nc",
+        "NemoNorthSeaORCA025-N006_data/coordinates.nc",
+    ]
+    + [
+        "POPSouthernOcean_data/t.x1_SAMOC_flux.169000.nc",
+        "POPSouthernOcean_data/t.x1_SAMOC_flux.169001.nc",
+        "POPSouthernOcean_data/t.x1_SAMOC_flux.169002.nc",
+        "POPSouthernOcean_data/t.x1_SAMOC_flux.169003.nc",
+        "POPSouthernOcean_data/t.x1_SAMOC_flux.169004.nc",
+        "POPSouthernOcean_data/t.x1_SAMOC_flux.169005.nc",
+    ]
+    + [
+        "SWASH_data/field_0065532.nc",
+        "SWASH_data/field_0065537.nc",
+        "SWASH_data/field_0065542.nc",
+        "SWASH_data/field_0065548.nc",
+        "SWASH_data/field_0065552.nc",
+        "SWASH_data/field_0065557.nc",
+    ]
+    + [f"WOA_data/woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)]
+    + ["CROCOidealized_data/CROCO_idealized.nc"]
+)
+
+_POOCH_REGISTRY = {k: None for k in _POOCH_REGISTRY_FILES}
 
 
-_POOCH_REGISTRY = _create_pooch_registry()
 _ODIE = pooch.create(
     path=_DATA_HOME,
     base_url=_DATA_URL,
@@ -151,7 +140,7 @@ def __init__(self, path_relative_to_root: str, pre_decode_cf_callable=None):
     def open_dataset(self) -> xr.Dataset:
         self.download_relevant_files()
         with xr.set_options(use_new_combine_kwarg_defaults=True):
-            ds = xr.open_mfdataset(Path(self.pup.path) / self.path_relative_to_root, decode_cf=False)
+            ds = xr.open_mfdataset(f"{self.pup.path}/{self.path_relative_to_root}", decode_cf=False)
 
         if self.pre_decode_cf_callable is not None:
             ds = self.pre_decode_cf_callable(ds)
@@ -193,8 +182,7 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset:
     return ds
 
 
-# The first here is a human readable key, the latter the path to load the netcdf data
-# (after refactor the latter open path will disappear, and will just be `open_zarr(f'{ds_key}.zip')`)
+# The first here is a human readable key used to open datasets, with an object to open the datasets
 # fmt: off
 _DATASET_KEYS_AND_CONFIGS: dict[str, _V3Dataset] = dict([
     ("MovingEddies_data/P", _V3Dataset("MovingEddies_data/moving_eddiesP.nc")),
@@ -207,7 +195,7 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset:
     ("Peninsula_data/V", _V3Dataset("Peninsula_data/peninsulaV.nc")),
     ("Peninsula_data/P", _V3Dataset("Peninsula_data/peninsulaP.nc")),
     ("Peninsula_data/T", _V3Dataset("Peninsula_data/peninsulaT.nc")),
-    ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc")),
+    ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat)),
     ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
     ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
     ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
@@ -232,77 +220,26 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset:
 # fmt: on
 
 
-def list_example_datasets(v4=False) -> list[str]:  # TODO: Remove v4 flag when migrating to open_dataset
+def list_datasets() -> list[str]:  # TODO: Remove v4 flag when migrating to open_dataset
     """List the available example datasets.
 
-    Use :func:`download_example_dataset` to download one of the datasets.
+    Use :func:`open_dataset` to download and open one of the datasets.
 
     Returns
     -------
     datasets : list of str
         The names of the available example datasets.
     """
-    if v4:
-        return list(_DATASET_KEYS_AND_CONFIGS.keys())
-    return list(set(v.path_relative_to_root.split("/")[0] for v in _DATASET_KEYS_AND_CONFIGS.values()))
-
-
-def download_example_dataset(dataset: str):
-    """Load an example dataset from the parcels website.
-
-    This function provides quick access to a small number of example datasets
-    that are useful in documentation and testing in parcels.
-
-    The location where the data is downloaded can be set using the environment variable PARCELS_EXAMPLE_DATA .
-
-    Parameters
-    ----------
-    dataset : str
-        Name of the dataset to load.
-
-    Returns
-    -------
-    dataset_folder : Path
-        Path to the folder containing the downloaded dataset files.
-    """
-    # Dev note: `dataset` is assumed to be a folder name with netcdf files
-    if dataset not in _EXAMPLE_DATA_FILES:
-        raise ValueError(
-            f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(_EXAMPLE_DATA_FILES.keys())
-        )
-
-    cache_folder = Path(_ODIE.path)
-    dataset_folder = cache_folder / dataset
-
-    for file_name in _ODIE.registry:
-        if file_name.startswith(dataset):
-            should_patch = dataset == "GlobCurrent_example_data"
-            _ODIE.fetch(file_name, processor=_v4_compat_patch if should_patch else None)
-
-    return dataset_folder
+    return list(_DATASET_KEYS_AND_CONFIGS.keys())
 
 
 def open_dataset(name: str):
     try:
         dataset_config = _DATASET_KEYS_AND_CONFIGS[name]
     except KeyError as e:
-        raise ValueError(
-            f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_example_datasets(v4=True))
-        ) from e
+        raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e
     assert not name.endswith((".zarr", ".zip", ".nc")), (
         "Dataset name should not have suffix"
     )  # TODO: Move to test_tutorial
 
     return dataset_config.open_dataset()
-
-
-def _v4_compat_patch(fname, action, pup):
-    """
-    Patch the GlobCurrent example dataset to be compatible with v4.
-
-    See https://www.fatiando.org/pooch/latest/processors.html#creating-your-own-processors
-    """
-    if action == "fetch":
-        return fname
-    xr.load_dataset(fname).pipe(patch_dataset_v4_compat).to_netcdf(fname)
-    return fname
diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py
index a958949514..847cbb3ea4 100644
--- a/tests/test_tutorial.py
+++ b/tests/test_tutorial.py
@@ -2,49 +2,29 @@
 import requests
 import xarray as xr
 
-from parcels.tutorial import (
-    _ODIE,
-    download_example_dataset,
-    list_example_datasets,
-    open_dataset,
-)
+import parcels.tutorial
 
 
-@pytest.fixture(scope="function")
+@pytest.fixture(scope="function", autouse=True)
 def tmp_path_parcels_example_data(monkeypatch, tmp_path):
     monkeypatch.setenv("PARCELS_EXAMPLE_DATA", str(tmp_path))
     return tmp_path
 
 
-@pytest.mark.parametrize("url", [_ODIE.get_url(filename) for filename in _ODIE.registry.keys()])
+@pytest.mark.parametrize(
+    "url", [parcels.tutorial._ODIE.get_url(filename) for filename in parcels.tutorial._ODIE.registry.keys()]
+)
 def test_pooch_registry_url_reponse(url):
     response = requests.head(url)
     assert not (400 <= response.status_code < 600)
 
 
-@pytest.mark.parametrize("dataset", list_example_datasets()[:1])
-def test_download_example_dataset_folder_creation(dataset):
-    dataset_folder_path = download_example_dataset(dataset)
-
-    assert dataset_folder_path.exists()
-    assert dataset_folder_path.name == dataset
-
-
-def test_download_non_existing_example_dataset(tmp_path_parcels_example_data):
-    with pytest.raises(ValueError):
-        download_example_dataset("non_existing_dataset")
-
-
-def test_download_example_dataset_no_data_home():
-    # This test depends on your default data_home location and whether
-    # it's okay to download files there. Be careful with this test in a CI environment.
-    dataset = list_example_datasets()[0]
-    dataset_folder_path = download_example_dataset(dataset)
-    assert dataset_folder_path.exists()
-    assert dataset_folder_path.name == dataset
+def test_open_dataset_non_existing():
+    with pytest.raises(ValueError, match="Dataset.*not found"):
+        parcels.tutorial.open_dataset("non_existing_dataset")
 
 
-@pytest.mark.parametrize("name", list_example_datasets(v4=True))
+@pytest.mark.parametrize("name", parcels.tutorial.list_datasets())
 def test_open_dataset(name):
-    ds = open_dataset(name)
+    ds = parcels.tutorial.open_dataset(name)
     assert isinstance(ds, xr.Dataset)

From b36cc8e4bfb18dc72c7bd4985585463991fef043 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 11:56:00 +0200
Subject: [PATCH 21/42] Update pooch registry

---
 src/parcels/tutorial.py | 127 +++++++++++++++++++---------------------
 1 file changed, 59 insertions(+), 68 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index 0a127f5e80..3e5c0be0b8 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -15,102 +15,93 @@
 # make a new release in the repo and update the DATA_REPO_TAG to the new tag
 _DATA_REPO_TAG = "main"
 
-_DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}/data"
+_DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}"
 
 _DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA")
 if _DATA_HOME is None:
     _DATA_HOME = pooch.os_cache("parcels")
 
-
-# Keys are the dataset names. Values are the filenames in the dataset folder. Note that
-# you can specify subfolders in the dataset folder putting slashes in the filename list.
-# e.g.,
-# "my_dataset": ["file0.nc", "folder1/file1.nc", "folder2/file2.nc"]
-# my_dataset/
-# ├── file0.nc
-# ├── folder1/
-# │   └── file1.nc
-# └── folder2/
-#     └── file2.nc
-#
 # See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets
 _POOCH_REGISTRY_FILES: list[str] = (
+    # These datasets are from v3 and before of Parcels, where we just used netcdf files
     [
-        "MovingEddies_data/moving_eddiesP.nc",
-        "MovingEddies_data/moving_eddiesU.nc",
-        "MovingEddies_data/moving_eddiesV.nc",
+        "data/MovingEddies_data/moving_eddiesP.nc",
+        "data/MovingEddies_data/moving_eddiesU.nc",
+        "data/MovingEddies_data/moving_eddiesV.nc",
     ]
-    + ["MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"]
-    + ["OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"]
+    + ["data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"]
+    + ["data/OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"]
     + [
-        "Peninsula_data/peninsulaU.nc",
-        "Peninsula_data/peninsulaV.nc",
-        "Peninsula_data/peninsulaP.nc",
-        "Peninsula_data/peninsulaT.nc",
+        "data/Peninsula_data/peninsulaU.nc",
+        "data/Peninsula_data/peninsulaV.nc",
+        "data/Peninsula_data/peninsulaP.nc",
+        "data/Peninsula_data/peninsulaT.nc",
     ]
     + [
-        f"GlobCurrent_example_data/{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"
+        f"data/GlobCurrent_example_data/{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc"
         for date in ([datetime(2002, 1, 1) + timedelta(days=x) for x in range(0, 365)] + [datetime(2003, 1, 1)])
     ]
     + [
-        "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-        "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
-        "CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+        "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+        "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
+        "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc",
     ]
     + [
-        "DecayingMovingEddy_data/decaying_moving_eddyU.nc",
-        "DecayingMovingEddy_data/decaying_moving_eddyV.nc",
+        "data/DecayingMovingEddy_data/decaying_moving_eddyU.nc",
+        "data/DecayingMovingEddy_data/decaying_moving_eddyV.nc",
     ]
     + [
-        "FESOM_periodic_channel/fesom_channel.nc",
-        "FESOM_periodic_channel/u.fesom_channel.nc",
-        "FESOM_periodic_channel/v.fesom_channel.nc",
-        "FESOM_periodic_channel/w.fesom_channel.nc",
+        "data/FESOM_periodic_channel/fesom_channel.nc",
+        "data/FESOM_periodic_channel/u.fesom_channel.nc",
+        "data/FESOM_periodic_channel/v.fesom_channel.nc",
+        "data/FESOM_periodic_channel/w.fesom_channel.nc",
     ]
     + [
-        "NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4",
-        "NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4",
-        "NemoCurvilinear_data/mesh_mask.nc4",
+        "data/NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4",
+        "data/NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4",
+        "data/NemoCurvilinear_data/mesh_mask.nc4",
     ]
     + [
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05U.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05U.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05U.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05U.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05U.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05U.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05V.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05V.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05V.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05V.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05V.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05V.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05W.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05W.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05W.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05W.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05W.nc",
-        "NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05W.nc",
-        "NemoNorthSeaORCA025-N006_data/coordinates.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05U.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05U.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05U.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05U.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05U.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05U.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05V.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05V.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05V.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05V.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05V.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05V.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05W.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05W.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05W.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05W.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05W.nc",
+        "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05W.nc",
+        "data/NemoNorthSeaORCA025-N006_data/coordinates.nc",
     ]
     + [
-        "POPSouthernOcean_data/t.x1_SAMOC_flux.169000.nc",
-        "POPSouthernOcean_data/t.x1_SAMOC_flux.169001.nc",
-        "POPSouthernOcean_data/t.x1_SAMOC_flux.169002.nc",
-        "POPSouthernOcean_data/t.x1_SAMOC_flux.169003.nc",
-        "POPSouthernOcean_data/t.x1_SAMOC_flux.169004.nc",
-        "POPSouthernOcean_data/t.x1_SAMOC_flux.169005.nc",
+        "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169000.nc",
+        "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169001.nc",
+        "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169002.nc",
+        "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169003.nc",
+        "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169004.nc",
+        "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169005.nc",
     ]
     + [
-        "SWASH_data/field_0065532.nc",
-        "SWASH_data/field_0065537.nc",
-        "SWASH_data/field_0065542.nc",
-        "SWASH_data/field_0065548.nc",
-        "SWASH_data/field_0065552.nc",
-        "SWASH_data/field_0065557.nc",
+        "data/SWASH_data/field_0065532.nc",
+        "data/SWASH_data/field_0065537.nc",
+        "data/SWASH_data/field_0065542.nc",
+        "data/SWASH_data/field_0065548.nc",
+        "data/SWASH_data/field_0065552.nc",
+        "data/SWASH_data/field_0065557.nc",
     ]
-    + [f"WOA_data/woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)]
-    + ["CROCOidealized_data/CROCO_idealized.nc"]
+    + [f"data/WOA_data/woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)]
+    + ["data/CROCOidealized_data/CROCO_idealized.nc"]
+    # These datasets are from v4 of Parcels where we're opting for Zipped zarr datasets
+    # ...
 )
 
 _POOCH_REGISTRY = {k: None for k in _POOCH_REGISTRY_FILES}

From 0fd9dbd87ce77f44ed75fc926a5a313de213eab7 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 11:57:45 +0200
Subject: [PATCH 22/42] Move test

---
 src/parcels/tutorial.py | 3 ---
 tests/test_tutorial.py  | 5 +++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index 3e5c0be0b8..d52534531d 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -229,8 +229,5 @@ def open_dataset(name: str):
         dataset_config = _DATASET_KEYS_AND_CONFIGS[name]
     except KeyError as e:
         raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e
-    assert not name.endswith((".zarr", ".zip", ".nc")), (
-        "Dataset name should not have suffix"
-    )  # TODO: Move to test_tutorial
 
     return dataset_config.open_dataset()
diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py
index 847cbb3ea4..d25ab0cad4 100644
--- a/tests/test_tutorial.py
+++ b/tests/test_tutorial.py
@@ -28,3 +28,8 @@ def test_open_dataset_non_existing():
 def test_open_dataset(name):
     ds = parcels.tutorial.open_dataset(name)
     assert isinstance(ds, xr.Dataset)
+
+
+@pytest.mark.parametrize("name", parcels.tutorial.list_datasets())
+def test_dataset_keys(name):
+    assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"

From d408232103dae5e93796b748f3768d30f5414a90 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:02:53 +0200
Subject: [PATCH 23/42] Add purpose to datasets

---
 src/parcels/tutorial.py | 69 ++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index d52534531d..cca64b28ed 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -1,4 +1,5 @@
 import abc
+import enum
 import os
 from collections.abc import Callable
 from datetime import datetime, timedelta
@@ -175,38 +176,42 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset:
 
 # The first here is a human readable key used to open datasets, with an object to open the datasets
 # fmt: off
-_DATASET_KEYS_AND_CONFIGS: dict[str, _V3Dataset] = dict([
-    ("MovingEddies_data/P", _V3Dataset("MovingEddies_data/moving_eddiesP.nc")),
-    ("MovingEddies_data/U", _V3Dataset("MovingEddies_data/moving_eddiesU.nc")),
-    ("MovingEddies_data/V", _V3Dataset("MovingEddies_data/moving_eddiesV.nc")),
-    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", _V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc")),
-    ("OFAM_example_data/U", _V3Dataset("OFAM_example_data/OFAM_simple_U.nc")),
-    ("OFAM_example_data/V", _V3Dataset("OFAM_example_data/OFAM_simple_V.nc")),
-    ("Peninsula_data/U", _V3Dataset("Peninsula_data/peninsulaU.nc")),
-    ("Peninsula_data/V", _V3Dataset("Peninsula_data/peninsulaV.nc")),
-    ("Peninsula_data/P", _V3Dataset("Peninsula_data/peninsulaP.nc")),
-    ("Peninsula_data/T", _V3Dataset("Peninsula_data/peninsulaT.nc")),
-    ("GlobCurrent_example_data/data", _V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat)),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", _V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc")),
-    ("DecayingMovingEddy_data/U", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc")),
-    ("DecayingMovingEddy_data/V", _V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc")),
-    ("FESOM_periodic_channel/fesom_channel", _V3Dataset("FESOM_periodic_channel/fesom_channel.nc")),
-    ("FESOM_periodic_channel/u.fesom_channel", _V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc")),
-    ("FESOM_periodic_channel/v.fesom_channel", _V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc")),
-    ("FESOM_periodic_channel/w.fesom_channel", _V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc")),
-    ("NemoCurvilinear_data_zonal/U", _V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4")),
-    ("NemoCurvilinear_data_zonal/V", _V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4")),
-    ("NemoCurvilinear_data_zonal/mesh_mask", _V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2)),
-    ("NemoNorthSeaORCA025-N006_data/U", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc")),
-    ("NemoNorthSeaORCA025-N006_data/V", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc")),
-    ("NemoNorthSeaORCA025-N006_data/W", _V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc")),
-    ("NemoNorthSeaORCA025-N006_data/mesh_mask", _V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1)),
+class _Purpose(enum.Enum):
+    TESTING = enum.auto()
+    TUTORIAL = enum.auto()
+
+_DATASET_KEYS_AND_CONFIGS: dict[str, tuple[_V3Dataset, _Purpose]] = dict([
+    ("MovingEddies_data/P", (_V3Dataset("MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)),
+    ("MovingEddies_data/U", (_V3Dataset("MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)),
+    ("MovingEddies_data/V", (_V3Dataset("MovingEddies_data/moving_eddiesV.nc"), _Purpose.TUTORIAL)),
+    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", (_V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), _Purpose.TUTORIAL)),
+    ("OFAM_example_data/U", (_V3Dataset("OFAM_example_data/OFAM_simple_U.nc"), _Purpose.TUTORIAL)),
+    ("OFAM_example_data/V", (_V3Dataset("OFAM_example_data/OFAM_simple_V.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/U", (_V3Dataset("Peninsula_data/peninsulaU.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/V", (_V3Dataset("Peninsula_data/peninsulaV.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/P", (_V3Dataset("Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/T", (_V3Dataset("Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)),
+    ("GlobCurrent_example_data/data", (_V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
+    ("DecayingMovingEddy_data/U", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)),
+    ("DecayingMovingEddy_data/V", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/u.fesom_channel", (_V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/v.fesom_channel", (_V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/w.fesom_channel", (_V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("NemoCurvilinear_data_zonal/U", (_V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), _Purpose.TUTORIAL)),
+    ("NemoCurvilinear_data_zonal/V", (_V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), _Purpose.TUTORIAL)),
+    ("NemoCurvilinear_data_zonal/mesh_mask", (_V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/U", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)),
     # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
-    ("SWASH_data/data", _V3Dataset("SWASH_data/field_00655*.nc")),
-    ("WOA_data/data", _V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day)),
-    ("CROCOidealized_data/data", _V3Dataset("CROCOidealized_data/CROCO_idealized.nc")),
+    ("SWASH_data/data", (_V3Dataset("SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)),
+    ("WOA_data/data", (_V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)),
+    ("CROCOidealized_data/data", (_V3Dataset("CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)),
 ])
 # fmt: on
 
@@ -226,7 +231,7 @@ def list_datasets() -> list[str]:  # TODO: Remove v4 flag when migrating to open
 
 def open_dataset(name: str):
     try:
-        dataset_config = _DATASET_KEYS_AND_CONFIGS[name]
+        dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0]
     except KeyError as e:
         raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e
 

From adff0aa7e64252081d47c4679d1f459a39aa95fe Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:03:56 +0200
Subject: [PATCH 24/42] Move file

---
 src/parcels/{tutorial.py => _datasets/remote.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/parcels/{tutorial.py => _datasets/remote.py} (100%)

diff --git a/src/parcels/tutorial.py b/src/parcels/_datasets/remote.py
similarity index 100%
rename from src/parcels/tutorial.py
rename to src/parcels/_datasets/remote.py

From c6adb753bb89349d26a341d2675886cb32848555 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:16:08 +0200
Subject: [PATCH 25/42] Move test file

---
 .../{test_tutorial.py => datasets/test_remote.py}  | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)
 rename tests/{test_tutorial.py => datasets/test_remote.py} (62%)

diff --git a/tests/test_tutorial.py b/tests/datasets/test_remote.py
similarity index 62%
rename from tests/test_tutorial.py
rename to tests/datasets/test_remote.py
index d25ab0cad4..be2ebd89c4 100644
--- a/tests/test_tutorial.py
+++ b/tests/datasets/test_remote.py
@@ -2,7 +2,7 @@
 import requests
 import xarray as xr
 
-import parcels.tutorial
+import parcels._datasets.remote as remote
 
 
 @pytest.fixture(scope="function", autouse=True)
@@ -11,9 +11,7 @@ def tmp_path_parcels_example_data(monkeypatch, tmp_path):
     return tmp_path
 
 
-@pytest.mark.parametrize(
-    "url", [parcels.tutorial._ODIE.get_url(filename) for filename in parcels.tutorial._ODIE.registry.keys()]
-)
+@pytest.mark.parametrize("url", [remote._ODIE.get_url(filename) for filename in remote._ODIE.registry.keys()])
 def test_pooch_registry_url_reponse(url):
     response = requests.head(url)
     assert not (400 <= response.status_code < 600)
@@ -21,15 +19,15 @@ def test_pooch_registry_url_reponse(url):
 
 def test_open_dataset_non_existing():
     with pytest.raises(ValueError, match="Dataset.*not found"):
-        parcels.tutorial.open_dataset("non_existing_dataset")
+        remote.open_dataset("non_existing_dataset")
 
 
-@pytest.mark.parametrize("name", parcels.tutorial.list_datasets())
+@pytest.mark.parametrize("name", remote.list_datasets())
 def test_open_dataset(name):
-    ds = parcels.tutorial.open_dataset(name)
+    ds = remote.open_dataset(name)
     assert isinstance(ds, xr.Dataset)
 
 
-@pytest.mark.parametrize("name", parcels.tutorial.list_datasets())
+@pytest.mark.parametrize("name", remote.list_datasets())
 def test_dataset_keys(name):
     assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"

From 3b751590a9b944c5d55a944303e13321010fa2ff Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:21:54 +0200
Subject: [PATCH 26/42] Update list_datasets and open_dataset

---
 src/parcels/_datasets/remote.py | 55 +++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 10 deletions(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index cca64b28ed..331a0f9c25 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -4,6 +4,7 @@
 from collections.abc import Callable
 from datetime import datetime, timedelta
 from pathlib import Path
+from typing import Literal
 
 import pooch
 import xarray as xr
@@ -174,12 +175,15 @@ def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset:
     return ds
 
 
-# The first here is a human readable key used to open datasets, with an object to open the datasets
-# fmt: off
 class _Purpose(enum.Enum):
-    TESTING = enum.auto()
-    TUTORIAL = enum.auto()
+    TESTING = "testing"
+    TUTORIAL = "tutorial"
+
+
+_TPurpose = Literal["testing", "tutorial"]
 
+# The first here is a human readable key used to open datasets, with an object to open the datasets
+# fmt: off
 _DATASET_KEYS_AND_CONFIGS: dict[str, tuple[_V3Dataset, _Purpose]] = dict([
     ("MovingEddies_data/P", (_V3Dataset("MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)),
     ("MovingEddies_data/U", (_V3Dataset("MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)),
@@ -216,23 +220,54 @@ class _Purpose(enum.Enum):
 # fmt: on
 
 
-def list_datasets() -> list[str]:  # TODO: Remove v4 flag when migrating to open_dataset
-    """List the available example datasets.
+def list_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]:
+    """List the available remote datasets.
 
     Use :func:`open_dataset` to download and open one of the datasets.
 
+    Parameters
+    ----------
+    purpose : {'any', 'testing', 'tutorial'}, optional
+        Filter datasets by purpose. Use ``'any'`` (default) to return all
+        datasets, ``'tutorial'`` for tutorial datasets, or ``'testing'`` for
+        datasets used in tests.
+
     Returns
     -------
     datasets : list of str
-        The names of the available example datasets.
+        The names of the available datasets matching the given purpose.
     """
-    return list(_DATASET_KEYS_AND_CONFIGS.keys())
+    if purpose == "any":
+        return list(_DATASET_KEYS_AND_CONFIGS.keys())
 
+    purpose_enum = _Purpose(purpose)
+    return [k for (k, (_, p)) in _DATASET_KEYS_AND_CONFIGS.items() if p == purpose_enum]
 
-def open_dataset(name: str):
+
+def open_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"):
+    """Download and open a remote dataset as an :class:`xarray.Dataset`.
+
+    Use :func:`list_datasets` to see the available dataset names.
+
+    Parameters
+    ----------
+    name : str
+        Name of the dataset to open. Must be one of the keys returned by
+        :func:`list_datasets`.
+    purpose : {'any', 'testing', 'tutorial'}, optional
+        Purpose filter used to populate the error message when ``name`` is not
+        found. Defaults to ``'any'``.
+
+    Returns
+    -------
+    xarray.Dataset
+        The requested dataset.
+    """
     try:
         dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0]
     except KeyError as e:
-        raise ValueError(f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets())) from e
+        raise ValueError(
+            f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets(purpose=purpose))
+        ) from e
 
     return dataset_config.open_dataset()

From 89f8202c9f88ee8d1374a883a63be38c1190d8d9 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:22:02 +0200
Subject: [PATCH 27/42] Fix registry entry

---
 src/parcels/_datasets/remote.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index 331a0f9c25..ac038ef83e 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -32,7 +32,7 @@
         "data/MovingEddies_data/moving_eddiesV.nc",
     ]
     + ["data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"]
-    + ["data/OFAM_example_data/OFAM_simple_U.nc", "OFAM_example_data/OFAM_simple_V.nc"]
+    + ["data/OFAM_example_data/OFAM_simple_U.nc", "data/OFAM_example_data/OFAM_simple_V.nc"]
     + [
         "data/Peninsula_data/peninsulaU.nc",
         "data/Peninsula_data/peninsulaV.nc",

From ec32d2a9764bea6af70d352bedfc0f7f5cea748f Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:24:51 +0200
Subject: [PATCH 28/42] Add thin parcels.tutorial wrapper

---
 src/parcels/tutorial.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 src/parcels/tutorial.py

diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
new file mode 100644
index 0000000000..2ac1780b91
--- /dev/null
+++ b/src/parcels/tutorial.py
@@ -0,0 +1,36 @@
+from parcels._datasets.remote import list_datasets as _remote_list_datasets
+from parcels._datasets.remote import open_dataset as _remote_open_dataset
+
+__all__ = ["list_datasets", "open_dataset"]
+
+
+def list_datasets() -> list[str]:
+    """List the available tutorial datasets.
+
+    Use :func:`open_dataset` to download and open one of the datasets.
+
+    Returns
+    -------
+    datasets : list of str
+        The names of the available datasets matching the given purpose.
+    """
+    return _remote_list_datasets(purpose="tutorial")
+
+
+def open_dataset(name: str):
+    """Download and open a tutorial dataset as an :class:`xarray.Dataset`.
+
+    Use :func:`list_datasets` to see the available dataset names.
+
+    Parameters
+    ----------
+    name : str
+        Name of the dataset to open. Must be one of the keys returned by
+        :func:`list_datasets`.
+
+    Returns
+    -------
+    xarray.Dataset
+        The requested dataset.
+    """
+    return _remote_open_dataset(name, purpose="tutorial")

From 04ce890d1f0444c46c759c245b2c86f89a15d2fc Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:26:28 +0200
Subject: [PATCH 29/42] Update comment

---
 src/parcels/_datasets/remote.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index ac038ef83e..7618c2c053 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -212,7 +212,7 @@ class _Purpose(enum.Enum):
     ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)),
     ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)),
     ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)),
-    # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
+    # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should not be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
     ("SWASH_data/data", (_V3Dataset("SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)),
     ("WOA_data/data", (_V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)),
     ("CROCOidealized_data/data", (_V3Dataset("CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)),

From 7c107d8d08a006133a66e67ebbbeb33d00b3d47e Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 12:31:15 +0200
Subject: [PATCH 30/42] Add test_list_datasets

---
 tests/datasets/test_remote.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/datasets/test_remote.py b/tests/datasets/test_remote.py
index be2ebd89c4..3aa739c42f 100644
--- a/tests/datasets/test_remote.py
+++ b/tests/datasets/test_remote.py
@@ -31,3 +31,12 @@ def test_open_dataset(name):
 @pytest.mark.parametrize("name", remote.list_datasets())
 def test_dataset_keys(name):
     assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
+
+
+def test_list_datasets():
+    tutorial_datasets = set(remote.list_datasets("tutorial"))
+    testing_datasets = set(remote.list_datasets("testing"))
+    all_datasets = set(remote.list_datasets("any"))
+    assert tutorial_datasets.issubset(all_datasets)
+    assert testing_datasets.issubset(all_datasets)
+    assert tutorial_datasets | testing_datasets == all_datasets

From 1cc975027d14df24800986646b13695129146a49 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 13:25:31 +0200
Subject: [PATCH 31/42] Fix relative paths

---
 src/parcels/_datasets/remote.py | 60 ++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index 7618c2c053..a96bb7aeaf 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -185,37 +185,37 @@ class _Purpose(enum.Enum):
 # The first here is a human readable key used to open datasets, with an object to open the datasets
 # fmt: off
 _DATASET_KEYS_AND_CONFIGS: dict[str, tuple[_V3Dataset, _Purpose]] = dict([
-    ("MovingEddies_data/P", (_V3Dataset("MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)),
-    ("MovingEddies_data/U", (_V3Dataset("MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)),
-    ("MovingEddies_data/V", (_V3Dataset("MovingEddies_data/moving_eddiesV.nc"), _Purpose.TUTORIAL)),
-    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", (_V3Dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), _Purpose.TUTORIAL)),
-    ("OFAM_example_data/U", (_V3Dataset("OFAM_example_data/OFAM_simple_U.nc"), _Purpose.TUTORIAL)),
-    ("OFAM_example_data/V", (_V3Dataset("OFAM_example_data/OFAM_simple_V.nc"), _Purpose.TUTORIAL)),
-    ("Peninsula_data/U", (_V3Dataset("Peninsula_data/peninsulaU.nc"), _Purpose.TUTORIAL)),
-    ("Peninsula_data/V", (_V3Dataset("Peninsula_data/peninsulaV.nc"), _Purpose.TUTORIAL)),
-    ("Peninsula_data/P", (_V3Dataset("Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)),
-    ("Peninsula_data/T", (_V3Dataset("Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)),
-    ("GlobCurrent_example_data/data", (_V3Dataset("GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
-    ("DecayingMovingEddy_data/U", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)),
-    ("DecayingMovingEddy_data/V", (_V3Dataset("DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)),
-    ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)),
-    ("FESOM_periodic_channel/u.fesom_channel", (_V3Dataset("FESOM_periodic_channel/u.fesom_channel.nc"), _Purpose.TUTORIAL)),
-    ("FESOM_periodic_channel/v.fesom_channel", (_V3Dataset("FESOM_periodic_channel/v.fesom_channel.nc"), _Purpose.TUTORIAL)),
-    ("FESOM_periodic_channel/w.fesom_channel", (_V3Dataset("FESOM_periodic_channel/w.fesom_channel.nc"), _Purpose.TUTORIAL)),
-    ("NemoCurvilinear_data_zonal/U", (_V3Dataset("NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), _Purpose.TUTORIAL)),
-    ("NemoCurvilinear_data_zonal/V", (_V3Dataset("NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), _Purpose.TUTORIAL)),
-    ("NemoCurvilinear_data_zonal/mesh_mask", (_V3Dataset("NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2), _Purpose.TUTORIAL)),
-    ("NemoNorthSeaORCA025-N006_data/U", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), _Purpose.TUTORIAL)),
-    ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)),
-    ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)),
-    ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)),
+    ("MovingEddies_data/P", (_V3Dataset("data/MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)),
+    ("MovingEddies_data/U", (_V3Dataset("data/MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)),
+    ("MovingEddies_data/V", (_V3Dataset("data/MovingEddies_data/moving_eddiesV.nc"), _Purpose.TUTORIAL)),
+    ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", (_V3Dataset("data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), _Purpose.TUTORIAL)),
+    ("OFAM_example_data/U", (_V3Dataset("data/OFAM_example_data/OFAM_simple_U.nc"), _Purpose.TUTORIAL)),
+    ("OFAM_example_data/V", (_V3Dataset("data/OFAM_example_data/OFAM_simple_V.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/U", (_V3Dataset("data/Peninsula_data/peninsulaU.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/V", (_V3Dataset("data/Peninsula_data/peninsulaV.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/P", (_V3Dataset("data/Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)),
+    ("Peninsula_data/T", (_V3Dataset("data/Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)),
+    ("GlobCurrent_example_data/data", (_V3Dataset("data/GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
+    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
+    ("DecayingMovingEddy_data/U", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)),
+    ("DecayingMovingEddy_data/V", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/u.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/u.fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/v.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/v.fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("FESOM_periodic_channel/w.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/w.fesom_channel.nc"), _Purpose.TUTORIAL)),
+    ("NemoCurvilinear_data_zonal/U", (_V3Dataset("data/NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), _Purpose.TUTORIAL)),
+    ("NemoCurvilinear_data_zonal/V", (_V3Dataset("data/NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), _Purpose.TUTORIAL)),
+    ("NemoCurvilinear_data_zonal/mesh_mask", (_V3Dataset("data/NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/U", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)),
+    ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)),
     # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should not be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973
-    ("SWASH_data/data", (_V3Dataset("SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)),
-    ("WOA_data/data", (_V3Dataset("WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)),
-    ("CROCOidealized_data/data", (_V3Dataset("CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)),
+    ("SWASH_data/data", (_V3Dataset("data/SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)),
+    ("WOA_data/data", (_V3Dataset("data/WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)),
+    ("CROCOidealized_data/data", (_V3Dataset("data/CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)),
 ])
 # fmt: on
 

From 38404a38de84bccf8a77df7ce640025f826dd5bd Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 13:45:29 +0200
Subject: [PATCH 32/42] Update references in codebase

---
 docs/getting_started/tutorial_output.ipynb    | 16 +++++++---
 docs/getting_started/tutorial_quickstart.md   | 10 +++---
 .../examples/explanation_kernelloop.md        | 10 +++---
 .../examples/tutorial_Argofloats.ipynb        | 16 +++++++---
 .../examples/tutorial_croco_3D.ipynb          | 12 +------
 .../examples/tutorial_delaystart.ipynb        | 16 +++++++---
 .../examples/tutorial_diffusion.ipynb         | 18 +++++++----
 .../examples/tutorial_dt_integrators.ipynb    | 16 +++++++---
 .../examples/tutorial_gsw_density.ipynb       | 16 +++++++---
 .../tutorial_manipulating_field_data.ipynb    | 16 +++++++---
 .../user_guide/examples/tutorial_mitgcm.ipynb |  5 +--
 docs/user_guide/examples/tutorial_nemo.ipynb  | 32 +++++++++----------
 .../examples/tutorial_sampling.ipynb          | 16 +++++++---
 src/parcels/__init__.py                       |  2 --
 tests/test_uxarray_fieldset.py                | 16 +++++++---
 15 files changed, 136 insertions(+), 81 deletions(-)

diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb
index 3b9eedec56..6baf5a9438 100644
--- a/docs/getting_started/tutorial_output.ipynb
+++ b/docs/getting_started/tutorial_output.ipynb
@@ -52,11 +52,19 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ")\n",
-    "\n",
-    "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
     "# Convert to SGRID-compliant dataset and create FieldSet\n",
diff --git a/docs/getting_started/tutorial_quickstart.md b/docs/getting_started/tutorial_quickstart.md
index c806311e2c..4c0bf02ac9 100644
--- a/docs/getting_started/tutorial_quickstart.md
+++ b/docs/getting_started/tutorial_quickstart.md
@@ -30,11 +30,11 @@ hydrodynamics fields in which the particles are tracked. Here we provide an exam
 [Global Ocean Physics Reanalysis](https://doi.org/10.48670/moi-00021) from the Copernicus Marine Service.
 
 ```{code-cell}
-example_dataset_folder = parcels.tutorial.download_example_dataset(
-    "CopernicusMarine_data_for_Argo_tutorial"
-)
-
-ds_fields = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords")
+ds_fields = xr.merge([
+    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"),
+    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"),
+    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"),
+])
 ds_fields.load()  # load the dataset into memory
 ds_fields
 ```
diff --git a/docs/user_guide/examples/explanation_kernelloop.md b/docs/user_guide/examples/explanation_kernelloop.md
index 582be17ed7..994cbd6228 100644
--- a/docs/user_guide/examples/explanation_kernelloop.md
+++ b/docs/user_guide/examples/explanation_kernelloop.md
@@ -56,11 +56,11 @@ import parcels
 import parcels.tutorial
 
 # Load the CopernicusMarine data in the Agulhas region from the example_datasets
-example_dataset_folder = parcels.tutorial.download_example_dataset(
-    "CopernicusMarine_data_for_Argo_tutorial"
-)
-
-ds_fields = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords")
+ds_fields = xr.merge([
+    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"),
+    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"),
+    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"),
+])
 ds_fields.load()  # load the dataset into memory
 
 # Create an idealised wind field and add it to the dataset
diff --git a/docs/user_guide/examples/tutorial_Argofloats.ipynb b/docs/user_guide/examples/tutorial_Argofloats.ipynb
index d0b487a24b..232c3c8b60 100644
--- a/docs/user_guide/examples/tutorial_Argofloats.ipynb
+++ b/docs/user_guide/examples/tutorial_Argofloats.ipynb
@@ -113,12 +113,20 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ")\n",
     "\n",
-    "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n",
-    "\n",
     "# TODO check how we can get good performance without loading full dataset in memory\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_croco_3D.ipynb b/docs/user_guide/examples/tutorial_croco_3D.ipynb
index 515a5cde08..7ea1424398 100644
--- a/docs/user_guide/examples/tutorial_croco_3D.ipynb
+++ b/docs/user_guide/examples/tutorial_croco_3D.ipynb
@@ -34,17 +34,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import xarray as xr\n",
-    "\n",
-    "import parcels\n",
-    "import parcels.tutorial\n",
-    "\n",
-    "data_folder = parcels.tutorial.download_example_dataset(\"CROCOidealized_data\")\n",
-    "ds_fields = xr.open_dataset(data_folder / \"CROCO_idealized.nc\")\n",
-    "\n",
-    "ds_fields.load();  # Preload data to speed up access"
+    "import matplotlib.pyplot as plt\nimport numpy as np\nimport xarray as xr\n\nimport parcels\nimport parcels.tutorial\n\nds_fields = parcels.tutorial.open_dataset(\"CROCOidealized_data/data\")\n\nds_fields.load();  # Preload data to speed up access"
    ]
   },
   {
diff --git a/docs/user_guide/examples/tutorial_delaystart.ipynb b/docs/user_guide/examples/tutorial_delaystart.ipynb
index 5ca66f4cd9..57d4eb6296 100644
--- a/docs/user_guide/examples/tutorial_delaystart.ipynb
+++ b/docs/user_guide/examples/tutorial_delaystart.ipynb
@@ -51,11 +51,19 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ")\n",
-    "\n",
-    "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
     "# Convert to SGRID-compliant dataset and create FieldSet\n",
diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb
index 992dcdfa55..ef2fba7611 100644
--- a/docs/user_guide/examples/tutorial_diffusion.ipynb
+++ b/docs/user_guide/examples/tutorial_diffusion.ipynb
@@ -467,12 +467,18 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
-    ")\n",
-    "\n",
-    "ds_fields = xr.open_mfdataset(\n",
-    "    f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ").isel(depth=slice(0, 1))\n",
     "ds_fields.load()  # load the dataset into memory"
    ]
diff --git a/docs/user_guide/examples/tutorial_dt_integrators.ipynb b/docs/user_guide/examples/tutorial_dt_integrators.ipynb
index 3ac6be4e13..6dd0ed9563 100644
--- a/docs/user_guide/examples/tutorial_dt_integrators.ipynb
+++ b/docs/user_guide/examples/tutorial_dt_integrators.ipynb
@@ -62,11 +62,19 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ")\n",
-    "\n",
-    "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
     "# Convert to SGRID-compliant dataset and create FieldSet\n",
diff --git a/docs/user_guide/examples/tutorial_gsw_density.ipynb b/docs/user_guide/examples/tutorial_gsw_density.ipynb
index 22bbba4ed3..5528051e32 100644
--- a/docs/user_guide/examples/tutorial_gsw_density.ipynb
+++ b/docs/user_guide/examples/tutorial_gsw_density.ipynb
@@ -39,12 +39,20 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ")\n",
     "\n",
-    "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n",
-    "\n",
     "# TODO check how we can get good performance without loading full dataset in memory\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
index dc17a0d6d0..cf085ade90 100644
--- a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
+++ b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
@@ -46,11 +46,19 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ")\n",
-    "\n",
-    "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
     "# Create an idealised wind field and add it to the dataset\n",
diff --git a/docs/user_guide/examples/tutorial_mitgcm.ipynb b/docs/user_guide/examples/tutorial_mitgcm.ipynb
index 845d15e361..ae46c7fce2 100644
--- a/docs/user_guide/examples/tutorial_mitgcm.ipynb
+++ b/docs/user_guide/examples/tutorial_mitgcm.ipynb
@@ -30,8 +30,9 @@
     "import parcels\n",
     "import parcels.tutorial\n",
     "\n",
-    "data_folder = parcels.tutorial.download_example_dataset(\"MITgcm_example_data\")\n",
-    "ds_fields = xr.open_dataset(data_folder / \"mitgcm_UV_surface_zonally_reentrant.nc\")"
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant\"\n",
+    ")"
    ]
   },
   {
diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb
index 5f27d2af65..0824bd54e0 100644
--- a/docs/user_guide/examples/tutorial_nemo.ipynb
+++ b/docs/user_guide/examples/tutorial_nemo.ipynb
@@ -66,15 +66,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_folder = parcels.tutorial.download_example_dataset(\"NemoCurvilinear_data\")\n",
-    "ds_fields = xr.open_mfdataset(\n",
-    "    data_folder.glob(\"*.nc4\"),\n",
-    "    data_vars=\"minimal\",\n",
-    "    coords=\"minimal\",\n",
-    "    compat=\"override\",\n",
-    ")\n",
-    "\n",
-    "ds_coords = xr.open_dataset(data_folder / \"mesh_mask.nc4\", decode_times=False)\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/U\"),\n",
+    "        parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/V\"),\n",
+    "    ]\n",
+    ")  # TODO: Fix grid staggering (using SGRID metadata)\n",
+    "ds_coords = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/mesh_mask\")\n",
     "ds_fset = parcels.convert.nemo_to_sgrid(\n",
     "    fields=dict(U=ds_fields[\"U\"], V=ds_fields[\"V\"]), coords=ds_coords\n",
     ")\n",
@@ -291,14 +289,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_folder = parcels.tutorial.download_example_dataset(\"NemoNorthSeaORCA025-N006_data\")\n",
-    "ds_fields = xr.open_mfdataset(\n",
-    "    data_folder.glob(\"ORCA*.nc\"),\n",
-    "    data_vars=\"minimal\",\n",
-    "    coords=\"minimal\",\n",
-    "    compat=\"override\",\n",
-    ")\n",
-    "ds_coords = xr.open_dataset(data_folder / \"coordinates.nc\", decode_times=False)\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/U\"),\n",
+    "        parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/V\"),\n",
+    "        parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/W\"),\n",
+    "    ]\n",
+    ")  # TODO: Fix grid staggering (using SGRID metadata)\n",
+    "ds_coords = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/mesh_mask\")\n",
     "ds_fset = parcels.convert.nemo_to_sgrid(\n",
     "    fields={\"U\": ds_fields[\"uo\"], \"V\": ds_fields[\"vo\"], \"W\": ds_fields[\"wo\"]},\n",
     "    coords=ds_coords,\n",
diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb
index 1867a5b1ca..8cdb02fc11 100644
--- a/docs/user_guide/examples/tutorial_sampling.ipynb
+++ b/docs/user_guide/examples/tutorial_sampling.ipynb
@@ -59,11 +59,19 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "example_dataset_folder = parcels.tutorial.download_example_dataset(\n",
-    "    \"CopernicusMarine_data_for_Argo_tutorial\"\n",
+    "ds_fields = xr.merge(\n",
+    "    [\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
+    "        ),\n",
+    "        parcels.tutorial.open_dataset(\n",
+    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
+    "        ),\n",
+    "    ]\n",
     ")\n",
-    "\n",
-    "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
     "# Convert to SGRID-compliant dataset and create FieldSet\n",
diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py
index 5330b6212a..2a7854cded 100644
--- a/src/parcels/__init__.py
+++ b/src/parcels/__init__.py
@@ -67,8 +67,6 @@
     "ParticleSetWarning",
     # Utilities
     "logger",
-    "download_example_dataset",
-    "list_example_datasets",
 ]
 
 _stdlib_warnings.warn(
diff --git a/tests/test_uxarray_fieldset.py b/tests/test_uxarray_fieldset.py
index d4a11f550b..8393424ee9 100644
--- a/tests/test_uxarray_fieldset.py
+++ b/tests/test_uxarray_fieldset.py
@@ -1,7 +1,10 @@
+from pathlib import Path
+
 import numpy as np
 import pytest
 import uxarray as ux
 
+import parcels._datasets.remote as _parcels_remote
 import parcels.tutorial
 from parcels import (
     Field,
@@ -22,12 +25,15 @@
 
 @pytest.fixture
 def ds_fesom_channel() -> ux.UxDataset:
-    fesom_path = parcels.tutorial.download_example_dataset("FESOM_periodic_channel")
-    grid_path = f"{fesom_path}/fesom_channel.nc"
+    # Download FESOM files via the new tutorial API
+    parcels.tutorial.open_dataset("FESOM_periodic_channel/fesom_channel")
+    # uxarray requires file paths; access the downloaded files from the pooch cache
+    _fesom_dir = Path(_parcels_remote._DATA_HOME) / "data" / "FESOM_periodic_channel"
+    grid_path = str(_fesom_dir / "fesom_channel.nc")
     data_path = [
-        f"{fesom_path}/u.fesom_channel.nc",
-        f"{fesom_path}/v.fesom_channel.nc",
-        f"{fesom_path}/w.fesom_channel.nc",
+        str(_fesom_dir / "u.fesom_channel.nc"),
+        str(_fesom_dir / "v.fesom_channel.nc"),
+        str(_fesom_dir / "w.fesom_channel.nc"),
     ]
     ds = ux.open_mfdataset(grid_path, data_path).rename_vars({"u": "U", "v": "V", "w": "W"})
     ds = fesom_to_ugrid(ds)

From d6e298d5eb28f734e8c068cc4228c6dc56999b52 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:21:07 +0200
Subject: [PATCH 33/42] Update function names

---
 src/parcels/_datasets/remote.py |  8 +++-----
 src/parcels/tutorial.py         |  8 ++++----
 tests/datasets/test_remote.py   | 14 +++++++-------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index a96bb7aeaf..2dcc1ee59f 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -11,8 +11,6 @@
 
 from parcels._v3to4 import patch_dataset_v4_compat
 
-__all__ = ["list_datasets", "open_dataset"]
-
 # When modifying existing datasets in a backwards incompatible way,
 # make a new release in the repo and update the DATA_REPO_TAG to the new tag
 _DATA_REPO_TAG = "main"
@@ -220,7 +218,7 @@ class _Purpose(enum.Enum):
 # fmt: on
 
 
-def list_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]:
+def list_remote_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]:
     """List the available remote datasets.
 
     Use :func:`open_dataset` to download and open one of the datasets.
@@ -244,7 +242,7 @@ def list_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]:
     return [k for (k, (_, p)) in _DATASET_KEYS_AND_CONFIGS.items() if p == purpose_enum]
 
 
-def open_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"):
+def open_remote_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"):
     """Download and open a remote dataset as an :class:`xarray.Dataset`.
 
     Use :func:`list_datasets` to see the available dataset names.
@@ -267,7 +265,7 @@ def open_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"):
         dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0]
     except KeyError as e:
         raise ValueError(
-            f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_datasets(purpose=purpose))
+            f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_remote_datasets(purpose=purpose))
         ) from e
 
     return dataset_config.open_dataset()
diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py
index 2ac1780b91..49a4a8b6bf 100644
--- a/src/parcels/tutorial.py
+++ b/src/parcels/tutorial.py
@@ -1,5 +1,5 @@
-from parcels._datasets.remote import list_datasets as _remote_list_datasets
-from parcels._datasets.remote import open_dataset as _remote_open_dataset
+from parcels._datasets.remote import list_remote_datasets as _list_remote_datasets
+from parcels._datasets.remote import open_remote_dataset as _open_remote_dataset
 
 __all__ = ["list_datasets", "open_dataset"]
 
@@ -14,7 +14,7 @@ def list_datasets() -> list[str]:
     datasets : list of str
         The names of the available datasets matching the given purpose.
     """
-    return _remote_list_datasets(purpose="tutorial")
+    return _list_remote_datasets(purpose="tutorial")
 
 
 def open_dataset(name: str):
@@ -33,4 +33,4 @@ def open_dataset(name: str):
     xarray.Dataset
         The requested dataset.
     """
-    return _remote_open_dataset(name, purpose="tutorial")
+    return _open_remote_dataset(name, purpose="tutorial")
diff --git a/tests/datasets/test_remote.py b/tests/datasets/test_remote.py
index 3aa739c42f..856752d018 100644
--- a/tests/datasets/test_remote.py
+++ b/tests/datasets/test_remote.py
@@ -19,24 +19,24 @@ def test_pooch_registry_url_reponse(url):
 
 def test_open_dataset_non_existing():
     with pytest.raises(ValueError, match="Dataset.*not found"):
-        remote.open_dataset("non_existing_dataset")
+        remote.open_remote_dataset("non_existing_dataset")
 
 
-@pytest.mark.parametrize("name", remote.list_datasets())
+@pytest.mark.parametrize("name", remote.list_remote_datasets())
 def test_open_dataset(name):
-    ds = remote.open_dataset(name)
+    ds = remote.open_remote_dataset(name)
     assert isinstance(ds, xr.Dataset)
 
 
-@pytest.mark.parametrize("name", remote.list_datasets())
+@pytest.mark.parametrize("name", remote.list_remote_datasets())
 def test_dataset_keys(name):
     assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix"
 
 
 def test_list_datasets():
-    tutorial_datasets = set(remote.list_datasets("tutorial"))
-    testing_datasets = set(remote.list_datasets("testing"))
-    all_datasets = set(remote.list_datasets("any"))
+    tutorial_datasets = set(remote.list_remote_datasets("tutorial"))
+    testing_datasets = set(remote.list_remote_datasets("testing"))
+    all_datasets = set(remote.list_remote_datasets("any"))
     assert tutorial_datasets.issubset(all_datasets)
     assert testing_datasets.issubset(all_datasets)
     assert tutorial_datasets | testing_datasets == all_datasets

From e21b024bdabaa8eb67cb33963255a8fbf8f51c83 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:48:20 +0200
Subject: [PATCH 34/42] Update options to open_mfdataset

---
 src/parcels/_datasets/remote.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index 2dcc1ee59f..72ddf8a103 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -131,7 +131,15 @@ def __init__(self, path_relative_to_root: str, pre_decode_cf_callable=None):
     def open_dataset(self) -> xr.Dataset:
         self.download_relevant_files()
         with xr.set_options(use_new_combine_kwarg_defaults=True):
-            ds = xr.open_mfdataset(f"{self.pup.path}/{self.path_relative_to_root}", decode_cf=False)
+            ds = xr.open_mfdataset(
+                f"{self.pup.path}/{self.path_relative_to_root}",
+                decode_cf=False,
+                # options to open mfdataset https://github.com/Parcels-code/Parcels/pull/2574#discussion_r3073256988
+                combine="nested",
+                data_vars="minimal",
+                coords="minimal",
+                compat="override",
+            )
 
         if self.pre_decode_cf_callable is not None:
             ds = self.pre_decode_cf_callable(ds)

From dc652da3ee12716c43989bb0cc7d5a1aac51d8f4 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:55:08 +0200
Subject: [PATCH 35/42] Fix imports in notebooks

---
 docs/getting_started/tutorial_output.ipynb        | 7 ++++---
 docs/user_guide/examples/tutorial_diffusion.ipynb | 3 ++-
 docs/user_guide/examples/tutorial_nemo.ipynb      | 3 ++-
 docs/user_guide/examples/tutorial_sampling.ipynb  | 3 ++-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb
index 6baf5a9438..4831a22791 100644
--- a/docs/getting_started/tutorial_output.ipynb
+++ b/docs/getting_started/tutorial_output.ipynb
@@ -35,7 +35,8 @@
     "import numpy as np\n",
     "import xarray as xr\n",
     "\n",
-    "import parcels"
+    "import parcels\n",
+    "import parcels.tutorial"
    ]
   },
   {
@@ -566,7 +567,7 @@
  "metadata": {
   "celltoolbar": "Metagegevens bewerken",
   "kernelspec": {
-   "display_name": "test-notebooks",
+   "display_name": "default",
    "language": "python",
    "name": "python3"
   },
@@ -580,7 +581,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.14.2"
+   "version": "3.14.3"
   }
  },
  "nbformat": 4,
diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb
index ef2fba7611..43f6636d52 100644
--- a/docs/user_guide/examples/tutorial_diffusion.ipynb
+++ b/docs/user_guide/examples/tutorial_diffusion.ipynb
@@ -119,7 +119,8 @@
     "import trajan as ta\n",
     "import xarray as xr\n",
     "\n",
-    "import parcels"
+    "import parcels\n",
+    "import parcels.tutorial"
    ]
   },
   {
diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb
index 0824bd54e0..9d31953d14 100644
--- a/docs/user_guide/examples/tutorial_nemo.ipynb
+++ b/docs/user_guide/examples/tutorial_nemo.ipynb
@@ -50,7 +50,8 @@
     "import numpy as np\n",
     "import xarray as xr\n",
     "\n",
-    "import parcels"
+    "import parcels\n",
+    "import parcels.tutorial"
    ]
   },
   {
diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb
index 8cdb02fc11..b2f1d9db58 100644
--- a/docs/user_guide/examples/tutorial_sampling.ipynb
+++ b/docs/user_guide/examples/tutorial_sampling.ipynb
@@ -41,7 +41,8 @@
     "# To open and look at the temperature data\n",
     "import xarray as xr\n",
     "\n",
-    "import parcels"
+    "import parcels\n",
+    "import parcels.tutorial"
    ]
   },
   {

From 472930d8398f9c87c0febbee25f04ddaa5f1c1ca Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:57:25 +0200
Subject: [PATCH 36/42] Remove combine="nested"

Use combine coords instead
---
 src/parcels/_datasets/remote.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index 72ddf8a103..3b9d9c7185 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -135,7 +135,6 @@ def open_dataset(self) -> xr.Dataset:
                 f"{self.pup.path}/{self.path_relative_to_root}",
                 decode_cf=False,
                 # options to open mfdataset https://github.com/Parcels-code/Parcels/pull/2574#discussion_r3073256988
-                combine="nested",
                 data_vars="minimal",
                 coords="minimal",
                 compat="override",

From 9f7d78f0a772855ac129620e4217daa55a32902b Mon Sep 17 00:00:00 2001
From: Nick Hodgskin <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 16:21:26 +0200
Subject: [PATCH 37/42] Update src/parcels/_datasets/remote.py

Co-authored-by: Erik van Sebille <e.vansebille@uu.nl>
---
 src/parcels/_datasets/remote.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index 3b9d9c7185..8df039f58e 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -164,7 +164,7 @@ def open_dataset(self) -> xr.Dataset:
 
 
 def _preprocess_drop_time_from_mesh1(ds: xr.Dataset) -> xr.Dataset:
-    # For some reason on the mesh "NemoNorthSeaORCA025-N006_data/coordinates.nc" there are time dimensions. These dimension also has broken cf-time metadata
+    # For some reason on the mesh "NemoNorthSeaORCA025-N006_data/coordinates.nc" there are two time dimensions (of length 1). These dimension also has broken cf-time metadata
     # this fixes that
     return ds.isel(time=0).drop(["time", "time_steps"])
 

From feb67c1238f5213bfefe6f56b11152335d30a929 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 16:33:39 +0200
Subject: [PATCH 38/42] Update copernicusmarine example dataset

---
 docs/getting_started/tutorial_output.ipynb         | 14 ++------------
 docs/getting_started/tutorial_quickstart.md        |  7 ++-----
 docs/user_guide/examples/explanation_kernelloop.md |  6 +-----
 docs/user_guide/examples/tutorial_Argofloats.ipynb | 14 ++------------
 docs/user_guide/examples/tutorial_delaystart.ipynb | 14 ++------------
 docs/user_guide/examples/tutorial_diffusion.ipynb  | 14 ++------------
 .../examples/tutorial_dt_integrators.ipynb         | 14 ++------------
 .../user_guide/examples/tutorial_gsw_density.ipynb | 14 ++------------
 .../tutorial_manipulating_field_data.ipynb         | 14 ++------------
 docs/user_guide/examples/tutorial_sampling.ipynb   | 14 ++------------
 src/parcels/_datasets/remote.py                    |  4 +---
 tests/test_particlefile.py                         |  2 +-
 12 files changed, 21 insertions(+), 110 deletions(-)

diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb
index 4831a22791..c3dbba8521 100644
--- a/docs/getting_started/tutorial_output.ipynb
+++ b/docs/getting_started/tutorial_output.ipynb
@@ -53,18 +53,8 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
diff --git a/docs/getting_started/tutorial_quickstart.md b/docs/getting_started/tutorial_quickstart.md
index 4c0bf02ac9..cf17e4bb40 100644
--- a/docs/getting_started/tutorial_quickstart.md
+++ b/docs/getting_started/tutorial_quickstart.md
@@ -30,11 +30,8 @@ hydrodynamics fields in which the particles are tracked. Here we provide an exam
 [Global Ocean Physics Reanalysis](https://doi.org/10.48670/moi-00021) from the Copernicus Marine Service.
 
 ```{code-cell}
-ds_fields = xr.merge([
-    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"),
-    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"),
-    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"),
-])
+ds_fields = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data")
+
 ds_fields.load()  # load the dataset into memory
 ds_fields
 ```
diff --git a/docs/user_guide/examples/explanation_kernelloop.md b/docs/user_guide/examples/explanation_kernelloop.md
index 994cbd6228..c4a9a58d12 100644
--- a/docs/user_guide/examples/explanation_kernelloop.md
+++ b/docs/user_guide/examples/explanation_kernelloop.md
@@ -56,11 +56,7 @@ import parcels
 import parcels.tutorial
 
 # Load the CopernicusMarine data in the Agulhas region from the example_datasets
-ds_fields = xr.merge([
-    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc"),
-    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc"),
-    parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc"),
-])
+ds_fields = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data")
 ds_fields.load()  # load the dataset into memory
 
 # Create an idealised wind field and add it to the dataset
diff --git a/docs/user_guide/examples/tutorial_Argofloats.ipynb b/docs/user_guide/examples/tutorial_Argofloats.ipynb
index 232c3c8b60..a96e7bcb6d 100644
--- a/docs/user_guide/examples/tutorial_Argofloats.ipynb
+++ b/docs/user_guide/examples/tutorial_Argofloats.ipynb
@@ -113,18 +113,8 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ")\n",
     "\n",
     "# TODO check how we can get good performance without loading full dataset in memory\n",
diff --git a/docs/user_guide/examples/tutorial_delaystart.ipynb b/docs/user_guide/examples/tutorial_delaystart.ipynb
index 57d4eb6296..8bb3ffd956 100644
--- a/docs/user_guide/examples/tutorial_delaystart.ipynb
+++ b/docs/user_guide/examples/tutorial_delaystart.ipynb
@@ -51,18 +51,8 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb
index 43f6636d52..5010f84067 100644
--- a/docs/user_guide/examples/tutorial_diffusion.ipynb
+++ b/docs/user_guide/examples/tutorial_diffusion.ipynb
@@ -468,18 +468,8 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ").isel(depth=slice(0, 1))\n",
     "ds_fields.load()  # load the dataset into memory"
    ]
diff --git a/docs/user_guide/examples/tutorial_dt_integrators.ipynb b/docs/user_guide/examples/tutorial_dt_integrators.ipynb
index 6dd0ed9563..bd4d93de54 100644
--- a/docs/user_guide/examples/tutorial_dt_integrators.ipynb
+++ b/docs/user_guide/examples/tutorial_dt_integrators.ipynb
@@ -62,18 +62,8 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_gsw_density.ipynb b/docs/user_guide/examples/tutorial_gsw_density.ipynb
index 5528051e32..fe089bbe13 100644
--- a/docs/user_guide/examples/tutorial_gsw_density.ipynb
+++ b/docs/user_guide/examples/tutorial_gsw_density.ipynb
@@ -39,18 +39,8 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ")\n",
     "\n",
     "# TODO check how we can get good performance without loading full dataset in memory\n",
diff --git a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
index cf085ade90..4196ba4a5f 100644
--- a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
+++ b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb
@@ -46,18 +46,8 @@
     "import parcels.tutorial\n",
     "\n",
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb
index b2f1d9db58..87855debe5 100644
--- a/docs/user_guide/examples/tutorial_sampling.ipynb
+++ b/docs/user_guide/examples/tutorial_sampling.ipynb
@@ -60,18 +60,8 @@
    "outputs": [],
    "source": [
     "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n",
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc\"\n",
-    "        ),\n",
-    "        parcels.tutorial.open_dataset(\n",
-    "            \"CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc\"\n",
-    "        ),\n",
-    "    ]\n",
+    "ds_fields = parcels.tutorial.open_dataset(\n",
+    "    \"CopernicusMarine_data_for_Argo_tutorial/data\"\n",
     ")\n",
     "ds_fields.load()  # load the dataset into memory\n",
     "\n",
diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index 8df039f58e..14c30cd112 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -201,9 +201,7 @@ class _Purpose(enum.Enum):
     ("Peninsula_data/P", (_V3Dataset("data/Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)),
     ("Peninsula_data/T", (_V3Dataset("data/Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)),
     ("GlobCurrent_example_data/data", (_V3Dataset("data/GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
-    ("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc"), _Purpose.TUTORIAL)),
+    ("CopernicusMarine_data_for_Argo_tutorial/data", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-*.nc"), _Purpose.TUTORIAL)),
     ("DecayingMovingEddy_data/U", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)),
     ("DecayingMovingEddy_data/V", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)),
     ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)),
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index ef4d1b565b..d5ccb4c1a8 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -442,7 +442,7 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     dt = -timedelta(minutes=5)
 
     # TODO: Not ideal using the `open_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have
-    ds_in = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc")
+    ds_in = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data")
     fields = {"U": ds_in["uo"], "V": ds_in["vo"]}
     ds_fset = copernicusmarine_to_sgrid(fields=fields)
     fieldset = FieldSet.from_sgrid_conventions(ds_fset)

From 7b05a5414a72920db5776b8310a7d96d2a78abce Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 17:11:34 +0200
Subject: [PATCH 39/42] Bump minimum Xarray dependency and transitive deps

option use_new_combine_kwarg_defaults was introduced in 2025.8.0
---
 .github/ci/recipe.yaml | 2 +-
 pixi.toml              | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/ci/recipe.yaml b/.github/ci/recipe.yaml
index 7416a211d8..fb77725e40 100644
--- a/.github/ci/recipe.yaml
+++ b/.github/ci/recipe.yaml
@@ -35,7 +35,7 @@ requirements:
     - netcdf4 >=1.7.2
     - numpy >=2.1.0
     - tqdm >=4.50.0
-    - xarray >=2024.5.0
+    - xarray >=2025.8.0
     - cf_xarray >=0.8.6
     - xgcm >=0.9.0
     - zarr >=2.15.0,!=2.18.0,<3
diff --git a/pixi.toml b/pixi.toml
index 4f5b3d5e05..64edb4e7f3 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -51,10 +51,10 @@ python = "3.11.*"
 netcdf4 = "1.6.*"
 numpy = "2.1.*"
 tqdm = "4.50.*"
-xarray = "2024.5.*"
+xarray = "2025.8.*"
 uxarray = "2025.3.*"
-dask = "2024.5.*"
-zarr = "2.15.*"
+dask = "2024.6.*"
+zarr = "2.18.*"
 xgcm = { version = "0.9.*", channel = "conda-forge" }
 cf_xarray = "0.8.*"
 cftime = "1.6.*"

From 232b240e1baf2e8f39f52d0bfbf8600dbeb7db6b Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 17:30:41 +0200
Subject: [PATCH 40/42] Fix nemo ingestion

---
 docs/user_guide/examples/tutorial_nemo.ipynb | 31 ++++++++------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb
index 9d31953d14..61fdbac563 100644
--- a/docs/user_guide/examples/tutorial_nemo.ipynb
+++ b/docs/user_guide/examples/tutorial_nemo.ipynb
@@ -67,15 +67,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/U\"),\n",
-    "        parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/V\"),\n",
-    "    ]\n",
-    ")  # TODO: Fix grid staggering (using SGRID metadata)\n",
+    "ds_u = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/U\")\n",
+    "ds_v = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/V\")\n",
     "ds_coords = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/mesh_mask\")\n",
     "ds_fset = parcels.convert.nemo_to_sgrid(\n",
-    "    fields=dict(U=ds_fields[\"U\"], V=ds_fields[\"V\"]), coords=ds_coords\n",
+    "    fields=dict(U=ds_u[\"U\"], V=ds_v[\"V\"]), coords=ds_coords\n",
     ")\n",
     "\n",
     "fieldset = parcels.FieldSet.from_sgrid_conventions(ds_fset)"
@@ -290,16 +286,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds_fields = xr.merge(\n",
-    "    [\n",
-    "        parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/U\"),\n",
-    "        parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/V\"),\n",
-    "        parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/W\"),\n",
-    "    ]\n",
-    ")  # TODO: Fix grid staggering (using SGRID metadata)\n",
-    "ds_coords = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/mesh_mask\")\n",
+    "ds_u = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/U\")\n",
+    "ds_v = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/V\")\n",
+    "ds_w = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/W\")\n",
+    "ds_coords = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/mesh_mask\")[\n",
+    "    [\"glamf\", \"gphif\"]\n",
+    "]\n",
+    "\n",
     "ds_fset = parcels.convert.nemo_to_sgrid(\n",
-    "    fields={\"U\": ds_fields[\"uo\"], \"V\": ds_fields[\"vo\"], \"W\": ds_fields[\"wo\"]},\n",
+    "    fields={\"U\": ds_u[\"uo\"], \"V\": ds_v[\"vo\"], \"W\": ds_w[\"wo\"]},\n",
     "    coords=ds_coords,\n",
     ")\n",
     "fieldset = parcels.FieldSet.from_sgrid_conventions(ds_fset)"
@@ -368,7 +363,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "docs",
+   "display_name": "default",
    "language": "python",
    "name": "python3"
   },
@@ -382,7 +377,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.14.2"
+   "version": "3.14.3"
   }
  },
  "nbformat": 4,

From 1b285797e22b064a4a6da77b694c3665f9348ceb Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 17:49:51 +0200
Subject: [PATCH 41/42] Fix cf_xarray dep in minimum env

---
 pixi.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pixi.toml b/pixi.toml
index 64edb4e7f3..22e141453d 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -56,7 +56,7 @@ uxarray = "2025.3.*"
 dask = "2024.6.*"
 zarr = "2.18.*"
 xgcm = { version = "0.9.*", channel = "conda-forge" }
-cf_xarray = "0.8.*"
+cf_xarray = "0.10.*"
 cftime = "1.6.*"
 pooch = "1.8.*"
 

From 7b4006e8f76472a428c4251be668b4720657951c Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 13 Apr 2026 17:53:38 +0200
Subject: [PATCH 42/42] Fix open_remote_dataset

It previously didn't respect the purpose since it relied on the KeyError
---
 src/parcels/_datasets/remote.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py
index 14c30cd112..e37c6a7723 100644
--- a/src/parcels/_datasets/remote.py
+++ b/src/parcels/_datasets/remote.py
@@ -266,11 +266,10 @@ def open_remote_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"):
     xarray.Dataset
         The requested dataset.
     """
-    try:
-        dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0]
-    except KeyError as e:
+    if name not in list_remote_datasets(purpose=purpose):
         raise ValueError(
             f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_remote_datasets(purpose=purpose))
-        ) from e
+        )
 
+    dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0]
     return dataset_config.open_dataset()