diff --git a/.github/ci/recipe.yaml b/.github/ci/recipe.yaml index 7416a211d8..fb77725e40 100644 --- a/.github/ci/recipe.yaml +++ b/.github/ci/recipe.yaml @@ -35,7 +35,7 @@ requirements: - netcdf4 >=1.7.2 - numpy >=2.1.0 - tqdm >=4.50.0 - - xarray >=2024.5.0 + - xarray >=2025.8.0 - cf_xarray >=0.8.6 - xgcm >=0.9.0 - zarr >=2.15.0,!=2.18.0,<3 diff --git a/docs/getting_started/tutorial_output.ipynb b/docs/getting_started/tutorial_output.ipynb index 485c3c0800..c3dbba8521 100644 --- a/docs/getting_started/tutorial_output.ipynb +++ b/docs/getting_started/tutorial_output.ipynb @@ -35,7 +35,8 @@ "import numpy as np\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { @@ -52,11 +53,9 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", @@ -558,7 +557,7 @@ "metadata": { "celltoolbar": "Metagegevens bewerken", "kernelspec": { - "display_name": "test-notebooks", + "display_name": "default", "language": "python", "name": "python3" }, @@ -572,7 +571,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.2" + "version": "3.14.3" } }, "nbformat": 4, diff --git a/docs/getting_started/tutorial_quickstart.md b/docs/getting_started/tutorial_quickstart.md index 60d5ced099..cf17e4bb40 100644 --- a/docs/getting_started/tutorial_quickstart.md +++ b/docs/getting_started/tutorial_quickstart.md @@ -20,6 +20,7 @@ and writing output files that can be read with xarray. import numpy as np import xarray as xr import parcels +import parcels.tutorial ``` ## Input flow fields: `FieldSet` @@ -29,11 +30,8 @@ hydrodynamics fields in which the particles are tracked. Here we provide an exam [Global Ocean Physics Reanalysis](https://doi.org/10.48670/moi-00021) from the Copernicus Marine Service. ```{code-cell} -example_dataset_folder = parcels.download_example_dataset( - "CopernicusMarine_data_for_Argo_tutorial" -) +ds_fields = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data") -ds_fields = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords") ds_fields.load() # load the dataset into memory ds_fields ``` diff --git a/docs/user_guide/examples/explanation_kernelloop.md b/docs/user_guide/examples/explanation_kernelloop.md index bb2d20743b..c4a9a58d12 100644 --- a/docs/user_guide/examples/explanation_kernelloop.md +++ b/docs/user_guide/examples/explanation_kernelloop.md @@ -53,13 +53,10 @@ import numpy as np import xarray as xr import parcels +import parcels.tutorial # Load the CopernicusMarine data in the Agulhas region from the example_datasets -example_dataset_folder = parcels.download_example_dataset( - "CopernicusMarine_data_for_Argo_tutorial" -) - -ds_fields = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords") +ds_fields = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data") ds_fields.load() # load the dataset into memory # Create an idealised wind field and add it to the dataset diff --git a/docs/user_guide/examples/tutorial_Argofloats.ipynb b/docs/user_guide/examples/tutorial_Argofloats.ipynb index 0a37193ce7..a96e7bcb6d 100644 --- a/docs/user_guide/examples/tutorial_Argofloats.ipynb +++ b/docs/user_guide/examples/tutorial_Argofloats.ipynb @@ -110,14 +110,13 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", - "\n", "# TODO check how we can get good performance without loading full dataset in memory\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/user_guide/examples/tutorial_croco_3D.ipynb b/docs/user_guide/examples/tutorial_croco_3D.ipynb index a84b0cc742..7ea1424398 100644 --- a/docs/user_guide/examples/tutorial_croco_3D.ipynb +++ b/docs/user_guide/examples/tutorial_croco_3D.ipynb @@ -34,16 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import xarray as xr\n", - "\n", - "import parcels\n", - "\n", - "data_folder = parcels.download_example_dataset(\"CROCOidealized_data\")\n", - "ds_fields = xr.open_dataset(data_folder / \"CROCO_idealized.nc\")\n", - "\n", - "ds_fields.load(); # Preload data to speed up access" + "import matplotlib.pyplot as plt\nimport numpy as np\nimport xarray as xr\n\nimport parcels\nimport parcels.tutorial\n\nds_fields = parcels.tutorial.open_dataset(\"CROCOidealized_data/data\")\n\nds_fields.load(); # Preload data to speed up access" ] }, { diff --git a/docs/user_guide/examples/tutorial_delaystart.ipynb b/docs/user_guide/examples/tutorial_delaystart.ipynb index 03bdb6a589..8bb3ffd956 100644 --- a/docs/user_guide/examples/tutorial_delaystart.ipynb +++ b/docs/user_guide/examples/tutorial_delaystart.ipynb @@ -30,6 +30,7 @@ "from matplotlib.animation import FuncAnimation\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# for interactive display of animations\n", "plt.rcParams[\"animation.html\"] = \"jshtml\"" @@ -50,11 +51,9 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", diff --git a/docs/user_guide/examples/tutorial_diffusion.ipynb b/docs/user_guide/examples/tutorial_diffusion.ipynb index 6e42b4ce91..5010f84067 100644 --- a/docs/user_guide/examples/tutorial_diffusion.ipynb +++ b/docs/user_guide/examples/tutorial_diffusion.ipynb @@ -119,7 +119,8 @@ "import trajan as ta\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { @@ -467,12 +468,8 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", - ")\n", - "\n", - "ds_fields = xr.open_mfdataset(\n", - " f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ").isel(depth=slice(0, 1))\n", "ds_fields.load() # load the dataset into memory" ] diff --git a/docs/user_guide/examples/tutorial_dt_integrators.ipynb b/docs/user_guide/examples/tutorial_dt_integrators.ipynb index 2ecf7f81af..bd4d93de54 100644 --- a/docs/user_guide/examples/tutorial_dt_integrators.ipynb +++ b/docs/user_guide/examples/tutorial_dt_integrators.ipynb @@ -59,13 +59,12 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", diff --git a/docs/user_guide/examples/tutorial_gsw_density.ipynb b/docs/user_guide/examples/tutorial_gsw_density.ipynb index 44b5928a7a..fe089bbe13 100644 --- a/docs/user_guide/examples/tutorial_gsw_density.ipynb +++ b/docs/user_guide/examples/tutorial_gsw_density.ipynb @@ -36,14 +36,13 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", - "\n", "# TODO check how we can get good performance without loading full dataset in memory\n", "ds_fields.load() # load the dataset into memory\n", "\n", diff --git a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb index f5dc55571b..4196ba4a5f 100644 --- a/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb +++ b/docs/user_guide/examples/tutorial_manipulating_field_data.ipynb @@ -43,13 +43,12 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Create an idealised wind field and add it to the dataset\n", diff --git a/docs/user_guide/examples/tutorial_mitgcm.ipynb b/docs/user_guide/examples/tutorial_mitgcm.ipynb index e94a0ea885..ae46c7fce2 100644 --- a/docs/user_guide/examples/tutorial_mitgcm.ipynb +++ b/docs/user_guide/examples/tutorial_mitgcm.ipynb @@ -28,9 +28,11 @@ "import xarray as xr\n", "\n", "import parcels\n", + "import parcels.tutorial\n", "\n", - "data_folder = parcels.download_example_dataset(\"MITgcm_example_data\")\n", - "ds_fields = xr.open_dataset(data_folder / \"mitgcm_UV_surface_zonally_reentrant.nc\")" + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant\"\n", + ")" ] }, { diff --git a/docs/user_guide/examples/tutorial_nemo.ipynb b/docs/user_guide/examples/tutorial_nemo.ipynb index fd2f50ab6c..61fdbac563 100644 --- a/docs/user_guide/examples/tutorial_nemo.ipynb +++ b/docs/user_guide/examples/tutorial_nemo.ipynb @@ -50,7 +50,8 @@ "import numpy as np\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { @@ -66,17 +67,11 @@ "metadata": {}, "outputs": [], "source": [ - "data_folder = parcels.download_example_dataset(\"NemoCurvilinear_data\")\n", - "ds_fields = xr.open_mfdataset(\n", - " data_folder.glob(\"*.nc4\"),\n", - " data_vars=\"minimal\",\n", - " coords=\"minimal\",\n", - " compat=\"override\",\n", - ")\n", - "\n", - "ds_coords = xr.open_dataset(data_folder / \"mesh_mask.nc4\", decode_times=False)\n", + "ds_u = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/U\")\n", + "ds_v = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/V\")\n", + "ds_coords = parcels.tutorial.open_dataset(\"NemoCurvilinear_data_zonal/mesh_mask\")\n", "ds_fset = parcels.convert.nemo_to_sgrid(\n", - " fields=dict(U=ds_fields[\"U\"], V=ds_fields[\"V\"]), coords=ds_coords\n", + " fields=dict(U=ds_u[\"U\"], V=ds_v[\"V\"]), coords=ds_coords\n", ")\n", "\n", "fieldset = parcels.FieldSet.from_sgrid_conventions(ds_fset)" @@ -291,16 +286,15 @@ "metadata": {}, "outputs": [], "source": [ - "data_folder = parcels.download_example_dataset(\"NemoNorthSeaORCA025-N006_data\")\n", - "ds_fields = xr.open_mfdataset(\n", - " data_folder.glob(\"ORCA*.nc\"),\n", - " data_vars=\"minimal\",\n", - " coords=\"minimal\",\n", - " compat=\"override\",\n", - ")\n", - "ds_coords = xr.open_dataset(data_folder / \"coordinates.nc\", decode_times=False)\n", + "ds_u = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/U\")\n", + "ds_v = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/V\")\n", + "ds_w = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/W\")\n", + "ds_coords = parcels.tutorial.open_dataset(\"NemoNorthSeaORCA025-N006_data/mesh_mask\")[\n", + " [\"glamf\", \"gphif\"]\n", + "]\n", + "\n", "ds_fset = parcels.convert.nemo_to_sgrid(\n", - " fields={\"U\": ds_fields[\"uo\"], \"V\": ds_fields[\"vo\"], \"W\": ds_fields[\"wo\"]},\n", + " fields={\"U\": ds_u[\"uo\"], \"V\": ds_v[\"vo\"], \"W\": ds_w[\"wo\"]},\n", " coords=ds_coords,\n", ")\n", "fieldset = parcels.FieldSet.from_sgrid_conventions(ds_fset)" @@ -369,7 +363,7 @@ ], "metadata": { "kernelspec": { - "display_name": "docs", + "display_name": "default", "language": "python", "name": "python3" }, @@ -383,7 +377,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.2" + "version": "3.14.3" } }, "nbformat": 4, diff --git a/docs/user_guide/examples/tutorial_sampling.ipynb b/docs/user_guide/examples/tutorial_sampling.ipynb index 27ff5af9de..87855debe5 100644 --- a/docs/user_guide/examples/tutorial_sampling.ipynb +++ b/docs/user_guide/examples/tutorial_sampling.ipynb @@ -41,7 +41,8 @@ "# To open and look at the temperature data\n", "import xarray as xr\n", "\n", - "import parcels" + "import parcels\n", + "import parcels.tutorial" ] }, { @@ -59,11 +60,9 @@ "outputs": [], "source": [ "# Load the CopernicusMarine data in the Agulhas region from the example_datasets\n", - "example_dataset_folder = parcels.download_example_dataset(\n", - " \"CopernicusMarine_data_for_Argo_tutorial\"\n", + "ds_fields = parcels.tutorial.open_dataset(\n", + " \"CopernicusMarine_data_for_Argo_tutorial/data\"\n", ")\n", - "\n", - "ds_fields = xr.open_mfdataset(f\"{example_dataset_folder}/*.nc\", combine=\"by_coords\")\n", "ds_fields.load() # load the dataset into memory\n", "\n", "# Convert to SGRID-compliant dataset and create FieldSet\n", diff --git a/pixi.toml b/pixi.toml index 4f5b3d5e05..22e141453d 100644 --- a/pixi.toml +++ b/pixi.toml @@ -51,12 +51,12 @@ python = "3.11.*" netcdf4 = "1.6.*" numpy = "2.1.*" tqdm = "4.50.*" -xarray = "2024.5.*" +xarray = "2025.8.*" uxarray = "2025.3.*" -dask = "2024.5.*" -zarr = "2.15.*" +dask = "2024.6.*" +zarr = "2.18.*" xgcm = { version = "0.9.*", channel = "conda-forge" } -cf_xarray = "0.8.*" +cf_xarray = "0.10.*" cftime = "1.6.*" pooch = "1.8.*" diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py index c13850a333..2a7854cded 100644 --- a/src/parcels/__init__.py +++ b/src/parcels/__init__.py @@ -38,7 +38,6 @@ ParticleSetWarning, ) from parcels._logger import logger -from parcels._tutorial import download_example_dataset, list_example_datasets __all__ = [ # noqa: RUF022 # Core classes @@ -68,8 +67,6 @@ "ParticleSetWarning", # Utilities "logger", - "download_example_dataset", - "list_example_datasets", ] _stdlib_warnings.warn( diff --git a/src/parcels/_datasets/remote.py b/src/parcels/_datasets/remote.py new file mode 100644 index 0000000000..e37c6a7723 --- /dev/null +++ b/src/parcels/_datasets/remote.py @@ -0,0 +1,275 @@ +import abc +import enum +import os +from collections.abc import Callable +from datetime import datetime, timedelta +from pathlib import Path +from typing import Literal + +import pooch +import xarray as xr + +from parcels._v3to4 import patch_dataset_v4_compat + +# When modifying existing datasets in a backwards incompatible way, +# make a new release in the repo and update the DATA_REPO_TAG to the new tag +_DATA_REPO_TAG = "main" + +_DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{_DATA_REPO_TAG}" + +_DATA_HOME = os.environ.get("PARCELS_EXAMPLE_DATA") +if _DATA_HOME is None: + _DATA_HOME = pooch.os_cache("parcels") + +# See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets +_POOCH_REGISTRY_FILES: list[str] = ( + # These datasets are from v3 and before of Parcels, where we just used netcdf files + [ + "data/MovingEddies_data/moving_eddiesP.nc", + "data/MovingEddies_data/moving_eddiesU.nc", + "data/MovingEddies_data/moving_eddiesV.nc", + ] + + ["data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"] + + ["data/OFAM_example_data/OFAM_simple_U.nc", "data/OFAM_example_data/OFAM_simple_V.nc"] + + [ + "data/Peninsula_data/peninsulaU.nc", + "data/Peninsula_data/peninsulaV.nc", + "data/Peninsula_data/peninsulaP.nc", + "data/Peninsula_data/peninsulaT.nc", + ] + + [ + f"data/GlobCurrent_example_data/{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc" + for date in ([datetime(2002, 1, 1) + timedelta(days=x) for x in range(0, 365)] + [datetime(2003, 1, 1)]) + ] + + [ + "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + "data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", + ] + + [ + "data/DecayingMovingEddy_data/decaying_moving_eddyU.nc", + "data/DecayingMovingEddy_data/decaying_moving_eddyV.nc", + ] + + [ + "data/FESOM_periodic_channel/fesom_channel.nc", + "data/FESOM_periodic_channel/u.fesom_channel.nc", + "data/FESOM_periodic_channel/v.fesom_channel.nc", + "data/FESOM_periodic_channel/w.fesom_channel.nc", + ] + + [ + "data/NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4", + "data/NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4", + "data/NemoCurvilinear_data/mesh_mask.nc4", + ] + + [ + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05U.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05V.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000104d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000109d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000114d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000119d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000124d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_20000129d05W.nc", + "data/NemoNorthSeaORCA025-N006_data/coordinates.nc", + ] + + [ + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169000.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169001.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169002.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169003.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169004.nc", + "data/POPSouthernOcean_data/t.x1_SAMOC_flux.169005.nc", + ] + + [ + "data/SWASH_data/field_0065532.nc", + "data/SWASH_data/field_0065537.nc", + "data/SWASH_data/field_0065542.nc", + "data/SWASH_data/field_0065548.nc", + "data/SWASH_data/field_0065552.nc", + "data/SWASH_data/field_0065557.nc", + ] + + [f"data/WOA_data/woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)] + + ["data/CROCOidealized_data/CROCO_idealized.nc"] + # These datasets are from v4 of Parcels where we're opting for Zipped zarr datasets + # ... +) + +_POOCH_REGISTRY = {k: None for k in _POOCH_REGISTRY_FILES} + + +_ODIE = pooch.create( + path=_DATA_HOME, + base_url=_DATA_URL, + registry=_POOCH_REGISTRY, +) + + +class _ParcelsDataset(abc.ABC): + @abc.abstractmethod + def open_dataset(self) -> xr.Dataset: ... + + +class _V3Dataset(_ParcelsDataset): + def __init__(self, path_relative_to_root: str, pre_decode_cf_callable=None): + self.path_relative_to_root = path_relative_to_root # glob is allowed + + # Function to apply to the dataset before the decoding the CF variables + self.pup = _ODIE + self.pre_decode_cf_callable: None | Callable[[xr.Dataset], xr.Dataset] = pre_decode_cf_callable + self.v3_dataset_name = path_relative_to_root.split("/")[0] + + def open_dataset(self) -> xr.Dataset: + self.download_relevant_files() + with xr.set_options(use_new_combine_kwarg_defaults=True): + ds = xr.open_mfdataset( + f"{self.pup.path}/{self.path_relative_to_root}", + decode_cf=False, + # options to open mfdataset https://github.com/Parcels-code/Parcels/pull/2574#discussion_r3073256988 + data_vars="minimal", + coords="minimal", + compat="override", + ) + + if self.pre_decode_cf_callable is not None: + ds = self.pre_decode_cf_callable(ds) + + ds = xr.decode_cf(ds) + return ds + + def download_relevant_files(self) -> None: + for file in self.pup.registry: + if self.v3_dataset_name in file: + self.pup.fetch(file) + return + + +class _ZarrZipDataset(_ParcelsDataset): + def __init__(self, path_relative_to_root): + self.pup = _ODIE + self.path_relative_to_root = path_relative_to_root + + def open_dataset(self) -> xr.Dataset: + self.pup.fetch(self.path_relative_to_root) + return xr.open_zarr(Path(self.pup.path) / self.path_relative_to_root) + + +def _preprocess_drop_time_from_mesh1(ds: xr.Dataset) -> xr.Dataset: + # For some reason on the mesh "NemoNorthSeaORCA025-N006_data/coordinates.nc" there are two time dimensions (of length 1). These dimension also has broken cf-time metadata + # this fixes that + return ds.isel(time=0).drop(["time", "time_steps"]) + + +def _preprocess_drop_time_from_mesh2(ds: xr.Dataset) -> xr.Dataset: + # For some reason on the mesh "NemoCurvilinear_data_zonal/mesh_mask" there is a time dimension. + return ds.isel(time=0).drop(["time"]) + + +def _preprocess_set_cf_calendar_360_day(ds: xr.Dataset) -> xr.Dataset: + # For some reason "WOA_data/woa18_decav_t*_04.nc" looks to be simulation data using CF time (i.e., months of 30 days), however the calendar attribute isn't set. + ds.time.attrs.update({"calendar": "360_day"}) + return ds + + +class _Purpose(enum.Enum): + TESTING = "testing" + TUTORIAL = "tutorial" + + +_TPurpose = Literal["testing", "tutorial"] + +# The first here is a human readable key used to open datasets, with an object to open the datasets +# fmt: off +_DATASET_KEYS_AND_CONFIGS: dict[str, tuple[_V3Dataset, _Purpose]] = dict([ + ("MovingEddies_data/P", (_V3Dataset("data/MovingEddies_data/moving_eddiesP.nc"), _Purpose.TUTORIAL)), + ("MovingEddies_data/U", (_V3Dataset("data/MovingEddies_data/moving_eddiesU.nc"), _Purpose.TUTORIAL)), + ("MovingEddies_data/V", (_V3Dataset("data/MovingEddies_data/moving_eddiesV.nc"), _Purpose.TUTORIAL)), + ("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant", (_V3Dataset("data/MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant.nc"), _Purpose.TUTORIAL)), + ("OFAM_example_data/U", (_V3Dataset("data/OFAM_example_data/OFAM_simple_U.nc"), _Purpose.TUTORIAL)), + ("OFAM_example_data/V", (_V3Dataset("data/OFAM_example_data/OFAM_simple_V.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/U", (_V3Dataset("data/Peninsula_data/peninsulaU.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/V", (_V3Dataset("data/Peninsula_data/peninsulaV.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/P", (_V3Dataset("data/Peninsula_data/peninsulaP.nc"), _Purpose.TUTORIAL)), + ("Peninsula_data/T", (_V3Dataset("data/Peninsula_data/peninsulaT.nc"), _Purpose.TUTORIAL)), + ("GlobCurrent_example_data/data", (_V3Dataset("data/GlobCurrent_example_data/*000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc", pre_decode_cf_callable=patch_dataset_v4_compat), _Purpose.TUTORIAL)), + ("CopernicusMarine_data_for_Argo_tutorial/data", (_V3Dataset("data/CopernicusMarine_data_for_Argo_tutorial/cmems_mod_glo_phy-*.nc"), _Purpose.TUTORIAL)), + ("DecayingMovingEddy_data/U", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyU.nc"), _Purpose.TUTORIAL)), + ("DecayingMovingEddy_data/V", (_V3Dataset("data/DecayingMovingEddy_data/decaying_moving_eddyV.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/u.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/u.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/v.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/v.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("FESOM_periodic_channel/w.fesom_channel", (_V3Dataset("data/FESOM_periodic_channel/w.fesom_channel.nc"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/U", (_V3Dataset("data/NemoCurvilinear_data/U_purely_zonal-ORCA025_grid_U.nc4"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/V", (_V3Dataset("data/NemoCurvilinear_data/V_purely_zonal-ORCA025_grid_V.nc4"), _Purpose.TUTORIAL)), + ("NemoCurvilinear_data_zonal/mesh_mask", (_V3Dataset("data/NemoCurvilinear_data/mesh_mask.nc4", _preprocess_drop_time_from_mesh2), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/U", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05U.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/V", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05V.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/W", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/ORCA025-N06_200001*05W.nc"), _Purpose.TUTORIAL)), + ("NemoNorthSeaORCA025-N006_data/mesh_mask", (_V3Dataset("data/NemoNorthSeaORCA025-N006_data/coordinates.nc", _preprocess_drop_time_from_mesh1), _Purpose.TUTORIAL)), + # "POPSouthernOcean_data/t.x1_SAMOC_flux.16900*.nc", # TODO v4: In v3 but should not be in v4 https://github.com/Parcels-code/Parcels/issues/2571#issuecomment-4214476973 + ("SWASH_data/data", (_V3Dataset("data/SWASH_data/field_00655*.nc"), _Purpose.TUTORIAL)), + ("WOA_data/data", (_V3Dataset("data/WOA_data/woa18_decav_t*_04.nc", _preprocess_set_cf_calendar_360_day), _Purpose.TUTORIAL)), + ("CROCOidealized_data/data", (_V3Dataset("data/CROCOidealized_data/CROCO_idealized.nc"), _Purpose.TUTORIAL)), +]) +# fmt: on + + +def list_remote_datasets(purpose: _TPurpose | Literal["any"] = "any") -> list[str]: + """List the available remote datasets. + + Use :func:`open_dataset` to download and open one of the datasets. + + Parameters + ---------- + purpose : {'any', 'testing', 'tutorial'}, optional + Filter datasets by purpose. Use ``'any'`` (default) to return all + datasets, ``'tutorial'`` for tutorial datasets, or ``'testing'`` for + datasets used in tests. + + Returns + ------- + datasets : list of str + The names of the available datasets matching the given purpose. + """ + if purpose == "any": + return list(_DATASET_KEYS_AND_CONFIGS.keys()) + + purpose_enum = _Purpose(purpose) + return [k for (k, (_, p)) in _DATASET_KEYS_AND_CONFIGS.items() if p == purpose_enum] + + +def open_remote_dataset(name: str, purpose: _TPurpose | Literal["any"] = "any"): + """Download and open a remote dataset as an :class:`xarray.Dataset`. + + Use :func:`list_datasets` to see the available dataset names. + + Parameters + ---------- + name : str + Name of the dataset to open. Must be one of the keys returned by + :func:`list_datasets`. + purpose : {'any', 'testing', 'tutorial'}, optional + Purpose filter used to populate the error message when ``name`` is not + found. Defaults to ``'any'``. + + Returns + ------- + xarray.Dataset + The requested dataset. + """ + if name not in list_remote_datasets(purpose=purpose): + raise ValueError( + f"Dataset {name!r} not found. Available datasets are: " + ", ".join(list_remote_datasets(purpose=purpose)) + ) + + dataset_config = _DATASET_KEYS_AND_CONFIGS[name][0] + return dataset_config.open_dataset() diff --git a/src/parcels/_tutorial.py b/src/parcels/_tutorial.py deleted file mode 100644 index 34409312cc..0000000000 --- a/src/parcels/_tutorial.py +++ /dev/null @@ -1,198 +0,0 @@ -import os -from datetime import datetime, timedelta -from pathlib import Path - -import pooch -import xarray as xr - -from parcels._v3to4 import patch_dataset_v4_compat - -__all__ = ["download_example_dataset", "list_example_datasets"] - -# When modifying existing datasets in a backwards incompatible way, -# make a new release in the repo and update the DATA_REPO_TAG to the new tag -DATA_REPO_TAG = "main" - -DATA_URL = f"https://github.com/Parcels-code/parcels-data/raw/{DATA_REPO_TAG}/data" - -# Keys are the dataset names. Values are the filenames in the dataset folder. Note that -# you can specify subfolders in the dataset folder putting slashes in the filename list. -# e.g., -# "my_dataset": ["file0.nc", "folder1/file1.nc", "folder2/file2.nc"] -# my_dataset/ -# ├── file0.nc -# ├── folder1/ -# │ └── file1.nc -# └── folder2/ -# └── file2.nc -# -# See instructions at https://github.com/Parcels-code/parcels-data for adding new datasets -EXAMPLE_DATA_FILES: dict[str, list[str]] = { - "MovingEddies_data": [ - "moving_eddiesP.nc", - "moving_eddiesU.nc", - "moving_eddiesV.nc", - ], - "MITgcm_example_data": ["mitgcm_UV_surface_zonally_reentrant.nc"], - "OFAM_example_data": ["OFAM_simple_U.nc", "OFAM_simple_V.nc"], - "Peninsula_data": [ - "peninsulaU.nc", - "peninsulaV.nc", - "peninsulaP.nc", - "peninsulaT.nc", - ], - "GlobCurrent_example_data": [ - f"{date.strftime('%Y%m%d')}000000-GLOBCURRENT-L4-CUReul_hs-ALT_SUM-v02.0-fv01.0.nc" - for date in ([datetime(2002, 1, 1) + timedelta(days=x) for x in range(0, 365)] + [datetime(2003, 1, 1)]) - ], - "CopernicusMarine_data_for_Argo_tutorial": [ - "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m_uo-vo_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m_so_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - "cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m_thetao_31.00E-33.00E_33.00S-30.00S_0.49-2225.08m_2024-01-01-2024-02-01.nc", - ], - "DecayingMovingEddy_data": [ - "decaying_moving_eddyU.nc", - "decaying_moving_eddyV.nc", - ], - "FESOM_periodic_channel": [ - "fesom_channel.nc", - "u.fesom_channel.nc", - "v.fesom_channel.nc", - "w.fesom_channel.nc", - ], - "NemoCurvilinear_data": [ - "U_purely_zonal-ORCA025_grid_U.nc4", - "V_purely_zonal-ORCA025_grid_V.nc4", - "mesh_mask.nc4", - ], - "NemoNorthSeaORCA025-N006_data": [ - "ORCA025-N06_20000104d05U.nc", - "ORCA025-N06_20000109d05U.nc", - "ORCA025-N06_20000114d05U.nc", - "ORCA025-N06_20000119d05U.nc", - "ORCA025-N06_20000124d05U.nc", - "ORCA025-N06_20000129d05U.nc", - "ORCA025-N06_20000104d05V.nc", - "ORCA025-N06_20000109d05V.nc", - "ORCA025-N06_20000114d05V.nc", - "ORCA025-N06_20000119d05V.nc", - "ORCA025-N06_20000124d05V.nc", - "ORCA025-N06_20000129d05V.nc", - "ORCA025-N06_20000104d05W.nc", - "ORCA025-N06_20000109d05W.nc", - "ORCA025-N06_20000114d05W.nc", - "ORCA025-N06_20000119d05W.nc", - "ORCA025-N06_20000124d05W.nc", - "ORCA025-N06_20000129d05W.nc", - "coordinates.nc", - ], - "POPSouthernOcean_data": [ - "t.x1_SAMOC_flux.169000.nc", - "t.x1_SAMOC_flux.169001.nc", - "t.x1_SAMOC_flux.169002.nc", - "t.x1_SAMOC_flux.169003.nc", - "t.x1_SAMOC_flux.169004.nc", - "t.x1_SAMOC_flux.169005.nc", - ], - "SWASH_data": [ - "field_0065532.nc", - "field_0065537.nc", - "field_0065542.nc", - "field_0065548.nc", - "field_0065552.nc", - "field_0065557.nc", - ], - "WOA_data": [f"woa18_decav_t{m:02d}_04.nc" for m in range(1, 13)], - "CROCOidealized_data": ["CROCO_idealized.nc"], -} - - -def _create_pooch_registry() -> dict[str, None]: - """Collapses the mapping of dataset names to filenames into a pooch registry. - - Hashes are set to None for all files. - """ - registry: dict[str, None] = {} - for dataset, filenames in EXAMPLE_DATA_FILES.items(): - for filename in filenames: - registry[f"{dataset}/{filename}"] = None - return registry - - -POOCH_REGISTRY = _create_pooch_registry() - - -def _get_pooch(data_home=None): - if data_home is None: - data_home = os.environ.get("PARCELS_EXAMPLE_DATA") - if data_home is None: - data_home = pooch.os_cache("parcels") - - return pooch.create( - path=data_home, - base_url=DATA_URL, - registry=POOCH_REGISTRY, - ) - - -def list_example_datasets() -> list[str]: - """List the available example datasets. - - Use :func:`download_example_dataset` to download one of the datasets. - - Returns - ------- - datasets : list of str - The names of the available example datasets. - """ - return list(EXAMPLE_DATA_FILES.keys()) - - -def download_example_dataset(dataset: str, data_home=None): - """Load an example dataset from the parcels website. - - This function provides quick access to a small number of example datasets - that are useful in documentation and testing in parcels. - - Parameters - ---------- - dataset : str - Name of the dataset to load. - data_home : pathlike, optional - The directory in which to cache data. If not specified, the value - of the ``PARCELS_EXAMPLE_DATA`` environment variable, if any, is used. - Otherwise the default location is assigned by :func:`get_data_home`. - - Returns - ------- - dataset_folder : Path - Path to the folder containing the downloaded dataset files. - """ - # Dev note: `dataset` is assumed to be a folder name with netcdf files - if dataset not in EXAMPLE_DATA_FILES: - raise ValueError( - f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(EXAMPLE_DATA_FILES.keys()) - ) - odie = _get_pooch(data_home=data_home) - - cache_folder = Path(odie.path) - dataset_folder = cache_folder / dataset - - for file_name in odie.registry: - if file_name.startswith(dataset): - should_patch = dataset == "GlobCurrent_example_data" - odie.fetch(file_name, processor=_v4_compat_patch if should_patch else None) - - return dataset_folder - - -def _v4_compat_patch(fname, action, pup): - """ - Patch the GlobCurrent example dataset to be compatible with v4. - - See https://www.fatiando.org/pooch/latest/processors.html#creating-your-own-processors - """ - if action == "fetch": - return fname - xr.load_dataset(fname).pipe(patch_dataset_v4_compat).to_netcdf(fname) - return fname diff --git a/src/parcels/tutorial.py b/src/parcels/tutorial.py new file mode 100644 index 0000000000..49a4a8b6bf --- /dev/null +++ b/src/parcels/tutorial.py @@ -0,0 +1,36 @@ +from parcels._datasets.remote import list_remote_datasets as _list_remote_datasets +from parcels._datasets.remote import open_remote_dataset as _open_remote_dataset + +__all__ = ["list_datasets", "open_dataset"] + + +def list_datasets() -> list[str]: + """List the available tutorial datasets. + + Use :func:`open_dataset` to download and open one of the datasets. + + Returns + ------- + datasets : list of str + The names of the available datasets matching the given purpose. + """ + return _list_remote_datasets(purpose="tutorial") + + +def open_dataset(name: str): + """Download and open a tutorial dataset as an :class:`xarray.Dataset`. + + Use :func:`list_datasets` to see the available dataset names. + + Parameters + ---------- + name : str + Name of the dataset to open. Must be one of the keys returned by + :func:`list_datasets`. + + Returns + ------- + xarray.Dataset + The requested dataset. + """ + return _open_remote_dataset(name, purpose="tutorial") diff --git a/tests-v3/tools/test_exampledata_utils.py b/tests-v3/tools/test_exampledata_utils.py deleted file mode 100644 index 94ed9cf833..0000000000 --- a/tests-v3/tools/test_exampledata_utils.py +++ /dev/null @@ -1,37 +0,0 @@ -import pytest -import requests - -from parcels.tools.exampledata_utils import ( - _get_pooch, - download_example_dataset, - list_example_datasets, -) - - -@pytest.mark.parametrize("url", [_get_pooch().get_url(filename) for filename in _get_pooch().registry.keys()]) -def test_pooch_registry_url_reponse(url): - response = requests.head(url) - assert not (400 <= response.status_code < 600) - - -@pytest.mark.parametrize("dataset", list_example_datasets()[:1]) -def test_download_example_dataset_folder_creation(tmp_path, dataset): - dataset_folder_path = download_example_dataset(dataset, data_home=tmp_path) - - assert dataset_folder_path.exists() - assert dataset_folder_path.name == dataset - assert dataset_folder_path.parent == tmp_path - - -def test_download_non_existing_example_dataset(tmp_path): - with pytest.raises(ValueError): - download_example_dataset("non_existing_dataset", data_home=tmp_path) - - -def test_download_example_dataset_no_data_home(): - # This test depends on your default data_home location and whether - # it's okay to download files there. Be careful with this test in a CI environment. - dataset = list_example_datasets()[0] - dataset_folder_path = download_example_dataset(dataset) - assert dataset_folder_path.exists() - assert dataset_folder_path.name == dataset diff --git a/tests/datasets/test_remote.py b/tests/datasets/test_remote.py new file mode 100644 index 0000000000..856752d018 --- /dev/null +++ b/tests/datasets/test_remote.py @@ -0,0 +1,42 @@ +import pytest +import requests +import xarray as xr + +import parcels._datasets.remote as remote + + +@pytest.fixture(scope="function", autouse=True) +def tmp_path_parcels_example_data(monkeypatch, tmp_path): + monkeypatch.setenv("PARCELS_EXAMPLE_DATA", str(tmp_path)) + return tmp_path + + +@pytest.mark.parametrize("url", [remote._ODIE.get_url(filename) for filename in remote._ODIE.registry.keys()]) +def test_pooch_registry_url_reponse(url): + response = requests.head(url) + assert not (400 <= response.status_code < 600) + + +def test_open_dataset_non_existing(): + with pytest.raises(ValueError, match="Dataset.*not found"): + remote.open_remote_dataset("non_existing_dataset") + + +@pytest.mark.parametrize("name", remote.list_remote_datasets()) +def test_open_dataset(name): + ds = remote.open_remote_dataset(name) + assert isinstance(ds, xr.Dataset) + + +@pytest.mark.parametrize("name", remote.list_remote_datasets()) +def test_dataset_keys(name): + assert not name.endswith((".zarr", ".zip", ".nc")), "Dataset name should not have suffix" + + +def test_list_datasets(): + tutorial_datasets = set(remote.list_remote_datasets("tutorial")) + testing_datasets = set(remote.list_remote_datasets("testing")) + all_datasets = set(remote.list_remote_datasets("any")) + assert tutorial_datasets.issubset(all_datasets) + assert testing_datasets.issubset(all_datasets) + assert tutorial_datasets | testing_datasets == all_datasets diff --git a/tests/test_advection.py b/tests/test_advection.py index c5d6a9ebf4..05450a4a93 100644 --- a/tests/test_advection.py +++ b/tests/test_advection.py @@ -3,6 +3,7 @@ import xarray as xr import parcels +import parcels.tutorial from parcels import ( Field, FieldSet, @@ -455,10 +456,9 @@ def UpdateP(particles, fieldset): # pragma: no cover def test_nemo_curvilinear_fieldset(): - data_folder = parcels.download_example_dataset("NemoCurvilinear_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc4")) - V = xr.open_mfdataset(data_folder.glob("*V.nc4")) - coords = xr.open_dataset(data_folder / "mesh_mask.nc4") + U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U") + V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V") + coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") ds = parcels.convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords) @@ -476,11 +476,10 @@ def test_nemo_curvilinear_fieldset(): @pytest.mark.parametrize("kernel", [AdvectionRK4, AdvectionRK4_3D]) def test_nemo_3D_curvilinear_fieldset(kernel): - data_folder = parcels.download_example_dataset("NemoNorthSeaORCA025-N006_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc")) - V = xr.open_mfdataset(data_folder.glob("*V.nc")) - W = xr.open_mfdataset(data_folder.glob("*W.nc")) - coords = xr.open_dataset(data_folder / "coordinates.nc", decode_times=False) + U = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/U") + V = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/V") + W = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/W") + coords = parcels.tutorial.open_dataset("NemoNorthSeaORCA025-N006_data/mesh_mask") ds = parcels.convert.nemo_to_sgrid(fields=dict(U=U["uo"], V=V["vo"], W=W["wo"]), coords=coords) @@ -505,8 +504,7 @@ def test_nemo_3D_curvilinear_fieldset(kernel): def test_mitgcm(): - data_folder = parcels.download_example_dataset("MITgcm_example_data") - ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc") + ds_fields = parcels.tutorial.open_dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant") ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=ds_fields) fieldset = FieldSet.from_sgrid_conventions(ds_fset) diff --git a/tests/test_convert.py b/tests/test_convert.py index 15bac3cfbf..b286bb2689 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -3,6 +3,7 @@ import parcels import parcels.convert as convert +import parcels.tutorial from parcels import FieldSet from parcels._core.utils import sgrid from parcels._datasets.structured.circulation_models import datasets as datasets_circulation_models @@ -10,10 +11,9 @@ def test_nemo_to_sgrid(): - data_folder = parcels.download_example_dataset("NemoCurvilinear_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc4")) - V = xr.open_mfdataset(data_folder.glob("*V.nc4")) - coords = xr.open_dataset(data_folder / "mesh_mask.nc4") + U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U") + V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V") + coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") ds = convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords) @@ -41,10 +41,9 @@ def test_nemo_to_sgrid(): def test_convert_nemo_offsets(): - data_folder = parcels.download_example_dataset("NemoCurvilinear_data") - U = xr.open_mfdataset(data_folder.glob("*U.nc4")) - V = xr.open_mfdataset(data_folder.glob("*V.nc4")) - coords = xr.open_dataset(data_folder / "mesh_mask.nc4") + U = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/U") + V = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/V") + coords = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") ds = convert.nemo_to_sgrid(fields=dict(U=U, V=V), coords=coords) fieldset = FieldSet.from_sgrid_conventions(ds) @@ -56,8 +55,7 @@ def test_convert_nemo_offsets(): def test_convert_mitgcm_offsets(): - data_folder = parcels.download_example_dataset("MITgcm_example_data") - ds_fields = xr.open_dataset(data_folder / "mitgcm_UV_surface_zonally_reentrant.nc") + ds_fields = parcels.tutorial.open_dataset("MITgcm_example_data/mitgcm_UV_surface_zonally_reentrant") coords = ds_fields[["XG", "YG", "Zl", "time"]] ds_fset = convert.mitgcm_to_sgrid(fields={"U": ds_fields.UVEL, "V": ds_fields.VVEL}, coords=coords) fieldset = FieldSet.from_sgrid_conventions(ds_fset) diff --git a/tests/test_index_search.py b/tests/test_index_search.py index 6f20ba1e93..bb7ec3f3b1 100644 --- a/tests/test_index_search.py +++ b/tests/test_index_search.py @@ -1,12 +1,11 @@ import numpy as np import pytest -import xarray as xr import xgcm +import parcels.tutorial from parcels import Field, XGrid from parcels._core.index_search import _search_indices_curvilinear_2d from parcels._datasets.structured.generic import datasets -from parcels._tutorial import download_example_dataset from parcels.interpolators import XLinear @@ -56,13 +55,8 @@ def test_grid_indexing_fpoints(field_cone): def test_indexing_nemo_curvilinear(): - data_folder = download_example_dataset("NemoCurvilinear_data") - ds = xr.open_mfdataset( - data_folder.glob("*.nc4"), combine="nested", data_vars="minimal", coords="minimal", compat="override" - ) - ds = ds.isel({"time_counter": 0, "time": 0, "z_a": 0}, drop=True).rename( - {"glamf": "lon", "gphif": "lat", "z": "depth"} - ) + ds = parcels.tutorial.open_dataset("NemoCurvilinear_data_zonal/mesh_mask") + ds = ds.isel({"z_a": 0}, drop=True).rename({"glamf": "lon", "gphif": "lat", "z": "depth"}) xgcm_grid = xgcm.Grid(ds, coords={"X": {"left": "x"}, "Y": {"left": "y"}}, periodic=False, autoparse_metadata=False) grid = XGrid(xgcm_grid, mesh="spherical") diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py index d642a544c7..d5ccb4c1a8 100755 --- a/tests/test_particlefile.py +++ b/tests/test_particlefile.py @@ -8,6 +8,7 @@ import xarray as xr from zarr.storage import MemoryStore +import parcels.tutorial from parcels import ( Field, FieldSet, @@ -18,7 +19,6 @@ Variable, VectorField, XGrid, - download_example_dataset, ) from parcels._core.particle import Particle, create_particle_data, get_default_particle from parcels._core.utils.time import TimeInterval, timedelta_to_float @@ -441,9 +441,8 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying(): runtime = timedelta(days=2) dt = -timedelta(minutes=5) - # TODO: Not ideal using the `download_example_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have - example_dataset_folder = download_example_dataset("CopernicusMarine_data_for_Argo_tutorial") - ds_in = xr.open_mfdataset(f"{example_dataset_folder}/*.nc", combine="by_coords") + # TODO: Not ideal using the `open_dataset` here, but I'm struggling to recreate this error using the test suite fieldsets we have + ds_in = parcels.tutorial.open_dataset("CopernicusMarine_data_for_Argo_tutorial/data") fields = {"U": ds_in["uo"], "V": ds_in["vo"]} ds_fset = copernicusmarine_to_sgrid(fields=fields) fieldset = FieldSet.from_sgrid_conventions(ds_fset) diff --git a/tests/test_sigmagrids.py b/tests/test_sigmagrids.py index de437c8fba..537c8c101d 100644 --- a/tests/test_sigmagrids.py +++ b/tests/test_sigmagrids.py @@ -1,7 +1,7 @@ import numpy as np -import xarray as xr import parcels +import parcels.tutorial from parcels import Particle, ParticleSet, Variable from parcels.kernels import AdvectionRK4_3D_CROCO, SampleOmegaCroco, convert_z_to_sigma_croco @@ -17,8 +17,7 @@ def test_conversion_3DCROCO(): lat, lon = ds.y_rho.values[y, x], ds.x_rho.values[y, x] ``` """ - data_folder = parcels.download_example_dataset("CROCOidealized_data") - ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc") + ds_fields = parcels.tutorial.open_dataset("CROCOidealized_data/data") fields = { "U": ds_fields["u"], "V": ds_fields["v"], @@ -46,8 +45,7 @@ def test_conversion_3DCROCO(): def test_advection_3DCROCO(): - data_folder = parcels.download_example_dataset("CROCOidealized_data") - ds_fields = xr.open_dataset(data_folder / "CROCO_idealized.nc") + ds_fields = parcels.tutorial.open_dataset("CROCOidealized_data/data") ds_fields.load() fields = { diff --git a/tests/test_uxarray_fieldset.py b/tests/test_uxarray_fieldset.py index 001c411c6c..8393424ee9 100644 --- a/tests/test_uxarray_fieldset.py +++ b/tests/test_uxarray_fieldset.py @@ -1,7 +1,11 @@ +from pathlib import Path + import numpy as np import pytest import uxarray as ux +import parcels._datasets.remote as _parcels_remote +import parcels.tutorial from parcels import ( Field, FieldSet, @@ -9,7 +13,6 @@ ParticleSet, UxGrid, VectorField, - download_example_dataset, ) from parcels._datasets.unstructured.generic import datasets as datasets_unstructured from parcels.convert import fesom_to_ugrid, icon_to_ugrid @@ -22,12 +25,15 @@ @pytest.fixture def ds_fesom_channel() -> ux.UxDataset: - fesom_path = download_example_dataset("FESOM_periodic_channel") - grid_path = f"{fesom_path}/fesom_channel.nc" + # Download FESOM files via the new tutorial API + parcels.tutorial.open_dataset("FESOM_periodic_channel/fesom_channel") + # uxarray requires file paths; access the downloaded files from the pooch cache + _fesom_dir = Path(_parcels_remote._DATA_HOME) / "data" / "FESOM_periodic_channel" + grid_path = str(_fesom_dir / "fesom_channel.nc") data_path = [ - f"{fesom_path}/u.fesom_channel.nc", - f"{fesom_path}/v.fesom_channel.nc", - f"{fesom_path}/w.fesom_channel.nc", + str(_fesom_dir / "u.fesom_channel.nc"), + str(_fesom_dir / "v.fesom_channel.nc"), + str(_fesom_dir / "w.fesom_channel.nc"), ] ds = ux.open_mfdataset(grid_path, data_path).rename_vars({"u": "U", "v": "V", "w": "W"}) ds = fesom_to_ugrid(ds)