From 36a990519129dc8ae1aefb3e91537ba6a0547640 Mon Sep 17 00:00:00 2001 From: Bhavani Ravi Date: Sun, 26 Oct 2025 22:35:23 +0530 Subject: [PATCH 01/17] chore: move standard examples to provider 1. loading example dags as bundles 2. fixing testcases to use bundles instead of dagbag folders 3. fixing testcases to use example_dags from standard module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Ahlert --- .../airflow/dag_processing/bundles/manager.py | 20 +++++++++++++++++ .../src/airflow/dag_processing/dagbag.py | 7 ------ .../src/airflow/example_dags/standard | 1 - .../tests/unit/dag_processing/test_dagbag.py | 18 ++++++++------- .../tests/unit/models/test_dagcode.py | 18 +++++++++------ .../tests/unit/models/test_serialized_dag.py | 22 +++++++++---------- .../src/tests_common/test_utils/db.py | 4 ++++ 7 files changed, 56 insertions(+), 34 deletions(-) delete mode 120000 airflow-core/src/airflow/example_dags/standard diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index f2f66cd7d2ad5..f23fad633f3c3 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import os import warnings from typing import TYPE_CHECKING @@ -106,6 +107,24 @@ def _add_example_dag_bundle(bundle_config_list: list[_ExternalBundleConfig]): ) +def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundleConfig]): + from airflow import providers + + for provider_path in providers.__path__: + for name in os.listdir(provider_path): + example_dag_folder = os.path.join(provider_path, name, "example_dags") + if os.path.isdir(example_dag_folder): + bundle_config_list.append( + _ExternalBundleConfig( + name=f"airflow-provider-{name}-example-dags", + classpath="airflow.dag_processing.bundles.local.LocalDagBundle", + kwargs={ + "path": example_dag_folder, + }, + ) + ) + + def _is_safe_bundle_url(url: str) -> bool: """ Check if a bundle URL is safe to use. @@ -191,6 +210,7 @@ def parse_config(self) -> None: bundle_config_list = _parse_bundle_config(config_list) if conf.getboolean("core", "LOAD_EXAMPLES"): _add_example_dag_bundle(bundle_config_list) + _add_provider_example_dags_to_bundle(bundle_config_list) for bundle_config in bundle_config_list: if bundle_config.team_name and not conf.getboolean("core", "multi_team"): diff --git a/airflow-core/src/airflow/dag_processing/dagbag.py b/airflow-core/src/airflow/dag_processing/dagbag.py index 5062a47bc7ede..cd58d10163596 100644 --- a/airflow-core/src/airflow/dag_processing/dagbag.py +++ b/airflow-core/src/airflow/dag_processing/dagbag.py @@ -477,13 +477,6 @@ def collect_dags( registry = get_importer_registry() files_to_parse = registry.list_dag_files(dag_folder, safe_mode=safe_mode) - if include_examples: - from airflow import example_dags - - example_dag_folder = next(iter(example_dags.__path__)) - - files_to_parse.extend(registry.list_dag_files(example_dag_folder, safe_mode=safe_mode)) - for filepath in files_to_parse: try: file_parse_start_dttm = timezone.utcnow() diff --git a/airflow-core/src/airflow/example_dags/standard b/airflow-core/src/airflow/example_dags/standard deleted file mode 120000 index 3c2ef23d52c55..0000000000000 --- a/airflow-core/src/airflow/example_dags/standard +++ /dev/null @@ -1 +0,0 @@ -../../../../providers/standard/src/airflow/providers/standard/example_dags \ No newline at end of file diff --git a/airflow-core/tests/unit/dag_processing/test_dagbag.py b/airflow-core/tests/unit/dag_processing/test_dagbag.py index a673f9f1b0d2a..1951937b720e7 100644 --- a/airflow-core/tests/unit/dag_processing/test_dagbag.py +++ b/airflow-core/tests/unit/dag_processing/test_dagbag.py @@ -28,6 +28,7 @@ import zipfile from copy import deepcopy from datetime import datetime, timedelta, timezone +from pathlib import Path from unittest import mock from unittest.mock import patch @@ -46,6 +47,7 @@ from airflow.models.dag import DagModel from airflow.models.dagwarning import DagWarning, DagWarningType from airflow.models.serialized_dag import SerializedDagModel +from airflow.providers.standard import example_dags as standard_example_dags from airflow.sdk import DAG, BaseOperator from tests_common.pytest_plugin import AIRFLOW_ROOT_PATH @@ -56,7 +58,7 @@ pytestmark = pytest.mark.db_test -example_dags_folder = AIRFLOW_ROOT_PATH / "airflow-core" / "src" / "airflow" / "example_dags" / "standard" +standard_example_dags_folder = Path(standard_example_dags.__file__).parent PY311 = sys.version_info >= (3, 11) PY313 = sys.version_info >= (3, 13) @@ -345,9 +347,9 @@ def test_get_existing_dag(self, tmp_path): """ Test that we're able to parse some example DAGs and retrieve them """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=True, bundle_name="test_bundle") + dagbag = DagBag(dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle") - some_expected_dag_ids = ["example_bash_operator", "example_branch_operator"] + some_expected_dag_ids = ["example_bash_operator", "example_python_operator"] for dag_id in some_expected_dag_ids: dag = dagbag.get_dag(dag_id) @@ -733,7 +735,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(include_examples=True) + dagbag = _TestDagBag(dag_folder=standard_example_dags_folder) dagbag.process_file_calls # Should not call process_file again, since it's already loaded during init. @@ -745,9 +747,9 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): ("file_to_load", "expected"), ( pytest.param( - pathlib.Path(example_dags_folder) / "example_bash_operator.py", + pathlib.Path(standard_example_dags_folder) / "example_bash_operator.py", { - "example_bash_operator": f"{example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / 'example_bash_operator.py'}" + "example_bash_operator": f"{standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / 'example_bash_operator.py'}" }, id="example_bash_operator", ), @@ -809,7 +811,7 @@ def test_refresh_py_dag(self, mock_dagmodel, tmp_path): Test that we can refresh an ordinary .py DAG """ dag_id = "example_bash_operator" - fileloc = str(example_dags_folder / "example_bash_operator.py") + fileloc = str(standard_example_dags_folder / "example_bash_operator.py") mock_dagmodel.return_value = DagModel() mock_dagmodel.return_value.last_expired = datetime.max.replace(tzinfo=timezone.utc) @@ -823,7 +825,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 return super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(dag_folder=os.fspath(tmp_path), include_examples=True) + dagbag = _TestDagBag(dag_folder=standard_example_dags_folder, include_examples=False) assert dagbag.process_file_calls == 1 dag = dagbag.get_dag(dag_id) diff --git a/airflow-core/tests/unit/models/test_dagcode.py b/airflow-core/tests/unit/models/test_dagcode.py index 6bb6e412442b0..5fdade754ee5c 100644 --- a/airflow-core/tests/unit/models/test_dagcode.py +++ b/airflow-core/tests/unit/models/test_dagcode.py @@ -77,10 +77,10 @@ def teardown_method(self): def _write_two_example_dags(self, session): example_dags = make_example_dags(example_dags_module) - bash_dag = example_dags["example_bash_operator"] - sync_dag_to_db(bash_dag, session=session) - dag_version = DagVersion.get_latest_version("example_bash_operator") - x = DagCode(dag_version, bash_dag.fileloc) + xcomargs_dag = example_dags["example_xcom_args"] + sync_dag_to_db(xcomargs_dag, session=session) + dag_version = DagVersion.get_latest_version("example_xcom_args") + x = DagCode(dag_version, xcomargs_dag.fileloc) session.add(x) session.commit() xcom_dag = example_dags["example_xcom"] @@ -89,7 +89,7 @@ def _write_two_example_dags(self, session): x = DagCode(dag_version, xcom_dag.fileloc) session.add(x) session.commit() - return [bash_dag, xcom_dag] + return [xcomargs_dag, xcom_dag] def _write_example_dags(self): example_dags = make_example_dags(example_dags_module) @@ -133,7 +133,9 @@ def test_code_can_be_read_when_no_access_to_file(self, testing_dag_bundle): Test that code can be retrieved from DB when you do not have access to Code file. Source Code should at least exist in one of DB or File. """ - example_dag = make_example_dags(example_dags_module).get("example_bash_operator") + from airflow.providers.standard import example_dags + + example_dag = make_example_dags(example_dags).get("example_bash_operator") sync_dag_to_db(example_dag) # Mock that there is no access to the Dag File @@ -146,7 +148,9 @@ def test_code_can_be_read_when_no_access_to_file(self, testing_dag_bundle): def test_db_code_created_on_serdag_change(self, session, testing_dag_bundle): """Test new DagCode is created in DB when ser dag is changed""" - example_dag = make_example_dags(example_dags_module).get("example_bash_operator") + from airflow.providers.standard import example_dags + + example_dag = make_example_dags(example_dags).get("example_bash_operator") sync_dag_to_db(example_dag, session=session).create_dagrun( run_id="test1", run_after=pendulum.datetime(2025, 1, 1, tz="UTC"), diff --git a/airflow-core/tests/unit/models/test_serialized_dag.py b/airflow-core/tests/unit/models/test_serialized_dag.py index 54438f8e82fc9..0f42d8efbdee8 100644 --- a/airflow-core/tests/unit/models/test_serialized_dag.py +++ b/airflow-core/tests/unit/models/test_serialized_dag.py @@ -158,14 +158,14 @@ def my_callable2(): def test_serialized_dag_is_updated_if_dag_is_changed(self, testing_dag_bundle): """Test Serialized DAG is updated if DAG is changed""" example_dags = make_example_dags(example_dags_module) - example_bash_op_dag = example_dags.get("example_bash_operator") + example_params_trigger_ui = example_dags.get("example_params_trigger_ui") dag_updated = SDM.write_dag( - dag=LazyDeserializedDAG.from_dag(example_bash_op_dag), + dag=LazyDeserializedDAG.from_dag(example_params_trigger_ui), bundle_name="testing", ) assert dag_updated is True - s_dag = SDM.get(example_bash_op_dag.dag_id) + s_dag = SDM.get(example_params_trigger_ui.dag_id) s_dag.dag.create_dagrun( run_id="test1", run_after=pendulum.datetime(2025, 1, 1, tz="UTC"), @@ -177,28 +177,28 @@ def test_serialized_dag_is_updated_if_dag_is_changed(self, testing_dag_bundle): # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated # column is not updated dag_updated = SDM.write_dag( - dag=LazyDeserializedDAG.from_dag(example_bash_op_dag), + dag=LazyDeserializedDAG.from_dag(example_params_trigger_ui), bundle_name="testing", ) - s_dag_1 = SDM.get(example_bash_op_dag.dag_id) + s_dag_1 = SDM.get(example_params_trigger_ui.dag_id) assert s_dag_1.dag_hash == s_dag.dag_hash assert s_dag.created_at == s_dag_1.created_at assert dag_updated is False # Update DAG - example_bash_op_dag.tags.add("new_tag") - assert example_bash_op_dag.tags == {"example", "example2", "new_tag"} + example_params_trigger_ui.tags.add("new_tag") + assert example_params_trigger_ui.tags == {"example", "new_tag", "params"} dag_updated = SDM.write_dag( - dag=LazyDeserializedDAG.from_dag(example_bash_op_dag), + dag=LazyDeserializedDAG.from_dag(example_params_trigger_ui), bundle_name="testing", ) - s_dag_2 = SDM.get(example_bash_op_dag.dag_id) + s_dag_2 = SDM.get(example_params_trigger_ui.dag_id) assert s_dag.created_at != s_dag_2.created_at assert s_dag.dag_hash != s_dag_2.dag_hash - assert s_dag_2.data["dag"]["tags"] == ["example", "example2", "new_tag"] + assert s_dag_2.data["dag"]["tags"] == ["example", "new_tag", "params"] assert dag_updated is True def test_read_dags(self): @@ -217,7 +217,7 @@ def test_read_all_dags_only_picks_the_latest_serdags(self, session): serialized_dags = SDM.read_all_dags() assert len(example_dags) == len(serialized_dags) - dag = example_dags.get("example_bash_operator") + dag = example_dags.get("example_params_trigger_ui") create_scheduler_dag(dag=dag).create_dagrun( run_id="test1", run_after=pendulum.datetime(2025, 1, 1, tz="UTC"), diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index cbfb0b377ae71..34c6d0d72ccab 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -206,6 +206,10 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + for bundle in DagBundlesManager().get_all_dag_bundles(): + dagbag = DagBag(dag_folder=bundle.path, include_examples=include_examples) + sync_bag_to_db(dagbag, bundle.name, None, session=session) + sync_bag_to_db(dagbag, "dags-folder", None, session=session) elif AIRFLOW_V_3_0_PLUS: dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] From a1d8210eb26542f774042e1042c7dc39b15ce616 Mon Sep 17 00:00:00 2001 From: Bhavani Ravi Date: Mon, 27 Oct 2025 14:17:20 +0530 Subject: [PATCH 02/17] fix: failing testcases to load examples form dagbundle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Ahlert --- .../routes/public/test_dag_parsing.py | 4 ++-- .../unit/cli/commands/test_dag_command.py | 23 ++++++++----------- .../unit/cli/commands/test_task_command.py | 3 ++- .../src/tests_common/test_utils/db.py | 3 +-- .../standard/operators/test_trigger_dagrun.py | 2 +- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py index d2abb5e672a5a..763e5e88669e2 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py @@ -56,7 +56,7 @@ def test_201_and_400_requests(self, url_safe_serializer, session, test_client): assert response.status_code == 201 parsing_requests = session.scalars(select(DagPriorityParsingRequest)).all() assert len(parsing_requests) == 1 - assert parsing_requests[0].bundle_name == "dags-folder" + assert parsing_requests[0].bundle_name == "example_dags" assert parsing_requests[0].relative_fileloc == test_dag.relative_fileloc _check_last_log(session, dag_id=None, event="reparse_dag_file", logical_date=None) @@ -65,7 +65,7 @@ def test_201_and_400_requests(self, url_safe_serializer, session, test_client): assert response.status_code == 409 parsing_requests = session.scalars(select(DagPriorityParsingRequest)).all() assert len(parsing_requests) == 1 - assert parsing_requests[0].bundle_name == "dags-folder" + assert parsing_requests[0].bundle_name == "example_dags" assert parsing_requests[0].relative_fileloc == test_dag.relative_fileloc _check_last_log(session, dag_id=None, event="reparse_dag_file", logical_date=None) diff --git a/airflow-core/tests/unit/cli/commands/test_dag_command.py b/airflow-core/tests/unit/cli/commands/test_dag_command.py index 0ec4bb931aa53..e73f2eebbaa95 100644 --- a/airflow-core/tests/unit/cli/commands/test_dag_command.py +++ b/airflow-core/tests/unit/cli/commands/test_dag_command.py @@ -81,7 +81,8 @@ class TestCliDags: @classmethod def setup_class(cls): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull, include_examples=True) cls.parser = cli_parser.get_parser() @classmethod @@ -270,9 +271,8 @@ def test_next_execution(self, dag_id, delta, schedule, catchup, first, second, t # Rebuild Test DB for other tests clear_db_dags() - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() - @conf_vars({("core", "load_examples"): "true"}) def test_cli_report(self, stdout_capture): args = self.parser.parse_args(["dags", "report", "--output", "json"]) with stdout_capture as temp_stdout: @@ -283,7 +283,6 @@ def test_cli_report(self, stdout_capture): assert any(item["file"].endswith("example_complex.py") for item in data) assert any("example_complex" in item["dags"] for item in data) - @conf_vars({("core", "load_examples"): "true"}) def test_cli_get_dag_details(self, stdout_capture): args = self.parser.parse_args(["dags", "details", "example_complex", "--output", "yaml"]) with stdout_capture as temp_stdout: @@ -300,7 +299,6 @@ def test_cli_get_dag_details(self, stdout_capture): for value in dag_details_values: assert value in out - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_dags(self, stdout_capture): args = self.parser.parse_args(["dags", "list", "--output", "json"]) with stdout_capture as temp_stdout: @@ -311,11 +309,12 @@ def test_cli_list_dags(self, stdout_capture): assert key in dag_list[0] assert any("airflow/example_dags/example_complex.py" in d["fileloc"] for d in dag_list) - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_local_dags(self, stdout_capture): # Clear the database clear_db_dags() - args = self.parser.parse_args(["dags", "list", "--output", "json", "--local"]) + args = self.parser.parse_args( + ["dags", "list", "--output", "json", "--local", "--bundle-name", "example_dags"] + ) with stdout_capture as temp_stdout: dag_command.dag_list_dags(args) out = temp_stdout.getvalue() @@ -324,7 +323,7 @@ def test_cli_list_local_dags(self, stdout_capture): assert key in dag_list[0] assert any("airflow/example_dags/example_complex.py" in d["fileloc"] for d in dag_list) # Rebuild Test DB for other tests - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() @conf_vars({("core", "load_examples"): "false"}) def test_cli_list_local_dags_with_bundle_name(self, configure_testing_dag_bundle, stdout_capture): @@ -345,9 +344,8 @@ def test_cli_list_local_dags_with_bundle_name(self, configure_testing_dag_bundle str(TEST_DAGS_FOLDER / "test_example_bash_operator.py") in d["fileloc"] for d in dag_list ) # Rebuild Test DB for other tests - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_dags_custom_cols(self, stdout_capture): args = self.parser.parse_args( ["dags", "list", "--output", "json", "--columns", "dag_id,last_parsed_time"] @@ -361,7 +359,6 @@ def test_cli_list_dags_custom_cols(self, stdout_capture): for key in ["fileloc", "owners", "is_paused"]: assert key not in dag_list[0] - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_dags_invalid_cols(self, stderr_capture): args = self.parser.parse_args(["dags", "list", "--output", "json", "--columns", "dag_id,invalid_col"]) with stderr_capture as temp_stderr: @@ -405,9 +402,8 @@ def test_cli_list_dags_prints_local_import_errors( assert "Failed to load all files." in out # Rebuild Test DB for other tests - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() - @conf_vars({("core", "load_examples"): "true"}) @mock.patch("airflow.models.DagModel.get_dagmodel") def test_list_dags_none_get_dagmodel(self, mock_get_dagmodel, stdout_capture): mock_get_dagmodel.return_value = None @@ -420,7 +416,6 @@ def test_list_dags_none_get_dagmodel(self, mock_get_dagmodel, stdout_capture): assert key in dag_list[0] assert any("airflow/example_dags/example_complex.py" in d["fileloc"] for d in dag_list) - @conf_vars({("core", "load_examples"): "true"}) def test_dagbag_dag_col(self, session): dagbag = DBDagBag() dag_details = dag_command._get_dagbag_dag_details( diff --git a/airflow-core/tests/unit/cli/commands/test_task_command.py b/airflow-core/tests/unit/cli/commands/test_task_command.py index b66384d5ad426..72182a1692c38 100644 --- a/airflow-core/tests/unit/cli/commands/test_task_command.py +++ b/airflow-core/tests/unit/cli/commands/test_task_command.py @@ -442,7 +442,8 @@ def test_task_state(self): ) def test_task_states_for_dag_run(self): - dag2 = DagBag().dags["example_python_operator"] + from airflow.providers.standard.example_dags.example_python_operator import dag as dag2 + lazy_deserialized_dag2 = LazyDeserializedDAG.from_dag(dag2) SerializedDagModel.write_dag(lazy_deserialized_dag2, bundle_name="testing") diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 34c6d0d72ccab..4869370cbb90a 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -205,12 +205,11 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): from airflow.dag_processing.dagbag import sync_bag_to_db except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] - + sync_bag_to_db(dagbag, "dags-folder", None, session=session) for bundle in DagBundlesManager().get_all_dag_bundles(): dagbag = DagBag(dag_folder=bundle.path, include_examples=include_examples) sync_bag_to_db(dagbag, bundle.name, None, session=session) - sync_bag_to_db(dagbag, "dags-folder", None, session=session) elif AIRFLOW_V_3_0_PLUS: dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: diff --git a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py index bcc5ad6153c6e..cefba46526878 100644 --- a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py +++ b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py @@ -101,7 +101,7 @@ def teardown_method(self): if AIRFLOW_V_3_0_PLUS: from airflow.models.dagbundle import DagBundleModel - session.execute(delete(DagBundleModel)) + session.query(DagBundleModel).filter_by(name="test_bundle").delete(synchronize_session=False) session.commit() @pytest.mark.skipif(not AIRFLOW_V_3_0_PLUS, reason="Implementation is different for Airflow 2 & 3") From 689f5028f39b87e5ecb2ef1cc59c5d34c402ba50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 09:49:08 -0300 Subject: [PATCH 03/17] Resolve provider example DAGs via ProvidersManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the directory walk over airflow.providers.__path__ with a lookup based on ProvidersManager. The previous approach silently skipped: - nested providers like apache-airflow-providers-common-sql, whose module path is airflow.providers.common.sql (one level deeper); - providers installed outside the airflow.providers namespace package, since they are not visible via os.listdir. The new implementation iterates over the providers registered through the apache_airflow_provider entry point, imports each provider module and adds its example_dags folder when present. Bundle names are now keyed on the canonical package name to keep them unique and stable across deployments. Signed-off-by: André Ahlert --- .../airflow/dag_processing/bundles/manager.py | 62 ++++++++++++++----- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index f23fad633f3c3..5a43c3ae50902 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -108,21 +108,55 @@ def _add_example_dag_bundle(bundle_config_list: list[_ExternalBundleConfig]): def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundleConfig]): - from airflow import providers - - for provider_path in providers.__path__: - for name in os.listdir(provider_path): - example_dag_folder = os.path.join(provider_path, name, "example_dags") - if os.path.isdir(example_dag_folder): - bundle_config_list.append( - _ExternalBundleConfig( - name=f"airflow-provider-{name}-example-dags", - classpath="airflow.dag_processing.bundles.local.LocalDagBundle", - kwargs={ - "path": example_dag_folder, - }, - ) + """ + Add an ``example_dags`` folder of every installed provider as a bundle. + + Provider locations are resolved through ``ProvidersManager`` instead of + walking ``airflow.providers.__path__`` so that: + + - nested providers (e.g. ``apache-airflow-providers-common-sql`` whose + module path is ``airflow.providers.common.sql``) are discovered; + - providers installed outside the ``airflow.providers`` namespace package + are discovered via their entry point. + """ + import importlib + import logging + + from airflow.providers_manager import ProvidersManager + + log = logging.getLogger(__name__) + seen: set[str] = set() + + for package_name in ProvidersManager().providers: + # apache-airflow-providers-foo-bar -> airflow.providers.foo.bar + if not package_name.startswith("apache-airflow-providers-"): + module_name = package_name.replace("-", "_") + else: + suffix = package_name[len("apache-airflow-providers-") :] + module_name = "airflow.providers." + suffix.replace("-", ".") + try: + module = importlib.import_module(module_name) + except ImportError: + log.debug("Could not import provider module %s for example DAG discovery", module_name) + continue + + for module_path in getattr(module, "__path__", []): + example_dag_folder = os.path.join(module_path, "example_dags") + if not os.path.isdir(example_dag_folder): + continue + bundle_name = f"airflow-provider-{package_name}-example-dags" + if bundle_name in seen: + continue + seen.add(bundle_name) + bundle_config_list.append( + _ExternalBundleConfig( + name=bundle_name, + classpath="airflow.dag_processing.bundles.local.LocalDagBundle", + kwargs={ + "path": example_dag_folder, + }, ) + ) def _is_safe_bundle_url(url: str) -> bool: From 4586cc5319cff97d95ae6c6268357c967e48be86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 10:05:54 -0300 Subject: [PATCH 04/17] Tests: relax test_get_all_bundle_names assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set of provider example DAG bundles depends on which providers expose an example_dags folder, which is environment specific. Pin only the built-in bundles and assert the prefix of any extra entry. Signed-off-by: André Ahlert --- .../dag_processing/bundles/test_dag_bundle_manager.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index 2c00e8aa25907..9bae2c4acbafb 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -466,4 +466,13 @@ def test_multiple_bundles_one_fails(clear_db, session): def test_get_all_bundle_names(): - assert DagBundlesManager().get_all_bundle_names() == ["dags-folder", "example_dags"] + bundle_names = DagBundlesManager().get_all_bundle_names() + # Built-in bundles are always present. + assert "dags-folder" in bundle_names + assert "example_dags" in bundle_names + # Any other bundle exposed here comes from a provider's example_dags + # folder discovered via ProvidersManager. Their presence depends on + # which providers are installed in the environment, so we only check + # the naming prefix instead of pinning an exact list. + extra = [n for n in bundle_names if n not in {"dags-folder", "example_dags"}] + assert all(n.startswith("airflow-provider-") for n in extra) From db1057e07df6f0fb2439b09fecdab9257e2a1345 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 10:27:41 -0300 Subject: [PATCH 05/17] Address review feedback on example DAG loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply remaining suggestions from the original review of the closed PR #57320: - bundles/manager.py: document that sync_bundles_to_db only reconciles bundle metadata and does not parse or write DAG rows, clarifying the signature confusion raised by @bhavaniravi during review. - tests/unit/dag_processing/test_dagbag.py: stop importing the standard provider's example_dags module at collection time. Move the import into a small helper plus a pytest fixture so tests that need the folder request it explicitly and the module remains collectable when the standard provider is not yet importable. The single parametrize case that referenced the folder now passes a relative file name and resolves the absolute path inside the test. - tests/unit/cli/commands/test_task_command.py: build a minimal DAG inline in test_task_states_for_dag_run instead of importing one from the standard provider's example_dags. The test only checks CLI behaviour around a known dag_id/task_id, so reproducing the name and a single task is enough to keep the core test decoupled from the standard provider's example DAGs. Signed-off-by: André Ahlert --- .../airflow/dag_processing/bundles/manager.py | 9 ++++ .../unit/cli/commands/test_task_command.py | 14 +++++- .../tests/unit/dag_processing/test_dagbag.py | 50 +++++++++++++------ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 5a43c3ae50902..199af8f52c5db 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -264,6 +264,15 @@ def parse_config(self) -> None: @provide_session def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: + """ + Persist the configured DAG bundles into ``DagBundleModel`` rows. + + This only reconciles bundle metadata, not the DAGs contained in them. + Parsing each bundle's DAG files and writing the resulting + ``DagModel`` / ``SerializedDagModel`` rows is the responsibility of + ``DagBag`` plus ``sync_bag_to_db`` (or, in production, the DAG + processor); calling this method does not trigger that work. + """ self.log.debug("Syncing DAG bundles to the database") def _extract_and_sign_template(bundle_name: str) -> tuple[str | None, dict]: diff --git a/airflow-core/tests/unit/cli/commands/test_task_command.py b/airflow-core/tests/unit/cli/commands/test_task_command.py index 72182a1692c38..da5c9896c3bcf 100644 --- a/airflow-core/tests/unit/cli/commands/test_task_command.py +++ b/airflow-core/tests/unit/cli/commands/test_task_command.py @@ -442,7 +442,19 @@ def test_task_state(self): ) def test_task_states_for_dag_run(self): - from airflow.providers.standard.example_dags.example_python_operator import dag as dag2 + # Build a minimal DAG inline rather than importing one from the + # standard provider's example_dags. The test only asserts CLI + # behaviour around a known dag_id/task_id pair, so reproducing the + # name and a single task is enough and keeps this core test + # decoupled from the standard provider's example DAGs. + from airflow.sdk import DAG + + with DAG( + dag_id="example_python_operator", + schedule=None, + start_date=timezone.datetime(2021, 1, 1), + ) as dag2: + BashOperator(task_id="print_the_context", bash_command="echo hello") lazy_deserialized_dag2 = LazyDeserializedDAG.from_dag(dag2) diff --git a/airflow-core/tests/unit/dag_processing/test_dagbag.py b/airflow-core/tests/unit/dag_processing/test_dagbag.py index 1951937b720e7..49fa6f8ebe950 100644 --- a/airflow-core/tests/unit/dag_processing/test_dagbag.py +++ b/airflow-core/tests/unit/dag_processing/test_dagbag.py @@ -47,7 +47,6 @@ from airflow.models.dag import DagModel from airflow.models.dagwarning import DagWarning, DagWarningType from airflow.models.serialized_dag import SerializedDagModel -from airflow.providers.standard import example_dags as standard_example_dags from airflow.sdk import DAG, BaseOperator from tests_common.pytest_plugin import AIRFLOW_ROOT_PATH @@ -58,7 +57,25 @@ pytestmark = pytest.mark.db_test -standard_example_dags_folder = Path(standard_example_dags.__file__).parent + +def _standard_example_dags_folder() -> Path: + """ + Return the filesystem path of the standard provider's ``example_dags``. + + Importing the provider lazily keeps the test module collectable in + environments where the standard provider is not yet installed. The + tests that actually need the folder will fail explicitly when the + provider is missing, instead of breaking pytest collection. + """ + from airflow.providers.standard import example_dags + + return Path(example_dags.__file__).parent + + +@pytest.fixture +def standard_example_dags_folder() -> Path: + return _standard_example_dags_folder() + PY311 = sys.version_info >= (3, 11) PY313 = sys.version_info >= (3, 13) @@ -343,11 +360,13 @@ def test_dagbag_with_bundle_name(self, tmp_path): dagbag2 = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) assert dagbag2.bundle_name is None - def test_get_existing_dag(self, tmp_path): + def test_get_existing_dag(self, tmp_path, standard_example_dags_folder): """ Test that we're able to parse some example DAGs and retrieve them """ - dagbag = DagBag(dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle") + dagbag = DagBag( + dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle" + ) some_expected_dag_ids = ["example_bash_operator", "example_python_operator"] @@ -716,7 +735,7 @@ def test_process_file_valid_param_check(self, tmp_path): assert len(dagbag.dags) == len(valid_dag_files) @patch.object(DagModel, "get_current") - def test_get_dag_without_refresh(self, mock_dagmodel): + def test_get_dag_without_refresh(self, mock_dagmodel, standard_example_dags_folder): """ Test that, once a DAG is loaded, it doesn't get refreshed again if it hasn't been expired. @@ -744,25 +763,24 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): assert dagbag.process_file_calls == 1 @pytest.mark.parametrize( - ("file_to_load", "expected"), + ("file_name", "expected_dag_id"), ( pytest.param( - pathlib.Path(standard_example_dags_folder) / "example_bash_operator.py", - { - "example_bash_operator": f"{standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / 'example_bash_operator.py'}" - }, + "example_bash_operator.py", + "example_bash_operator", id="example_bash_operator", ), ), ) - def test_get_dag_registration(self, file_to_load, expected): + def test_get_dag_registration(self, file_name, expected_dag_id, standard_example_dags_folder): pytest.importorskip("system.standard") + file_to_load = standard_example_dags_folder / file_name + expected_path = standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / file_name dagbag = DagBag(dag_folder=os.devnull, include_examples=False) dagbag.process_file(os.fspath(file_to_load)) - for dag_id, path in expected.items(): - dag = dagbag.get_dag(dag_id) - assert dag, f"{dag_id} was bagged" - assert dag.fileloc.endswith(path) + dag = dagbag.get_dag(expected_dag_id) + assert dag, f"{expected_dag_id} was bagged" + assert dag.fileloc.endswith(str(expected_path)) @pytest.mark.parametrize( ("expected"), @@ -806,7 +824,7 @@ def test_dag_registration_with_failure_zipped(self, zip_with_valid_dag_and_dup_t assert [dag.dag_id for dag in found] == ["test_example_bash_operator"] @patch.object(DagModel, "get_current") - def test_refresh_py_dag(self, mock_dagmodel, tmp_path): + def test_refresh_py_dag(self, mock_dagmodel, tmp_path, standard_example_dags_folder): """ Test that we can refresh an ordinary .py DAG """ From 88fe049fd55c42f46a3400e38d2ee2b6e3be0b3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 10:49:34 -0300 Subject: [PATCH 06/17] Tests: use SQLAlchemy 2.0 delete() in trigger_dagrun teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The teardown deletes a single DagBundleModel row keyed by name. Use session.execute(delete(...).where(...)) instead of the deprecated session.query(...).filter_by(...).delete() form so prek's prevent-deprecated-sqlalchemy-usage hook stays clean. Signed-off-by: André Ahlert --- .../tests/unit/standard/operators/test_trigger_dagrun.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py index cefba46526878..e99acd0575d79 100644 --- a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py +++ b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py @@ -101,7 +101,7 @@ def teardown_method(self): if AIRFLOW_V_3_0_PLUS: from airflow.models.dagbundle import DagBundleModel - session.query(DagBundleModel).filter_by(name="test_bundle").delete(synchronize_session=False) + session.execute(delete(DagBundleModel).where(DagBundleModel.name == "test_bundle")) session.commit() @pytest.mark.skipif(not AIRFLOW_V_3_0_PLUS, reason="Implementation is different for Airflow 2 & 3") From 06f05b5db200a9d9ce9ee9273c594dad2d0c0441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 1 May 2026 11:32:47 -0300 Subject: [PATCH 07/17] Tests: update bundle_name for example_python_operator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After moving the standard provider's example DAGs into their own bundle, example_python_operator no longer lives in the dags-folder bundle. Update the 24 expected payloads in test_task_instances.py that asserted bundle_name='dags-folder' to the new airflow-provider-apache-airflow-providers-standard-example-dags bundle name. The lone sync_bag_to_db('dags-folder', ...) call in this file is unrelated; it registers a synthetic dag built by dag_maker. Signed-off-by: André Ahlert --- .../routes/public/test_task_instances.py | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 9703b66550514..7c7221d353d2f 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -198,7 +198,7 @@ def test_should_respond_200(self, test_client, session): assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -356,7 +356,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -420,7 +420,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -476,7 +476,7 @@ def test_should_respond_200_task_instance_with_rendered(self, test_client, sessi assert response.json() == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -596,7 +596,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, test_client, se assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2469,7 +2469,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2515,7 +2515,7 @@ def test_should_respond_200_with_different_try_numbers(self, test_client, try_nu "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2592,7 +2592,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers( "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2664,7 +2664,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2711,7 +2711,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -3517,7 +3517,7 @@ def test_should_respond_200_with_dag_run_id( "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4009,7 +4009,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4046,7 +4046,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4117,7 +4117,7 @@ def test_ti_in_retry_state_not_returned(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4200,7 +4200,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4237,7 +4237,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4439,7 +4439,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4715,7 +4715,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, @@ -4853,7 +4853,7 @@ def test_update_mask_set_note_should_respond_200( "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4916,7 +4916,7 @@ def test_set_note_should_respond_200(self, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4997,7 +4997,7 @@ def test_set_note_should_respond_200_mapped_task_with_rtif(self, test_client, se "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5080,7 +5080,7 @@ def test_set_note_should_respond_200_mapped_task_summary_with_rtif(self, test_cl "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_ti["dag_version"]["created_at"], @@ -5196,7 +5196,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5484,7 +5484,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, From 457e7cddc27a628d01166ad42081eb6255385df1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 1 May 2026 13:26:26 -0300 Subject: [PATCH 08/17] Tests: avoid double-loading example DAGs in parse_and_sync_to_db MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Airflow 3.1+, parse_and_sync_to_db iterates every registered DAG bundle (including example_dags and airflow-provider-*-example-dags) and syncs them with their own bundle name. The leading DagBag(dag_folder=folder, include_examples=True) was also pulling example DAGs into the dags-folder bundle, so each example DAG ended up registered under two different bundles. The duplicated rows then violated the (asset_id, dag_id) unique constraint on dag_schedule_asset_reference and broke unrelated tests (notably the fab provider tests under compat). Force include_examples=False on the 3.1+ path; the bundle loop is already responsible for loading example DAGs from their own bundles. The 2.x and 3.0 paths are untouched. Signed-off-by: André Ahlert --- devel-common/src/tests_common/test_utils/db.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 4869370cbb90a..886ba600c69a8 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -199,20 +199,30 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): DagBundlesManager().sync_bundles_to_db(session=session) session.flush() - dagbag = DagBag(dag_folder=folder, include_examples=include_examples) if AIRFLOW_V_3_1_PLUS: try: from airflow.dag_processing.dagbag import sync_bag_to_db except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + # On 3.1+, example DAGs are exposed as their own bundles + # (``example_dags`` for core, ``airflow-provider-*-example-dags`` + # for each provider that ships an ``example_dags`` folder). The + # bundle loop below already syncs every one of them, so the + # ``dags-folder`` DagBag must NOT pull example DAGs in too, + # otherwise the same DAG gets registered under two bundles and + # ``dag_schedule_asset_reference`` rows collide on the unique + # ``(asset_id, dag_id)`` constraint. + dagbag = DagBag(dag_folder=folder, include_examples=False) sync_bag_to_db(dagbag, "dags-folder", None, session=session) for bundle in DagBundlesManager().get_all_dag_bundles(): - dagbag = DagBag(dag_folder=bundle.path, include_examples=include_examples) - sync_bag_to_db(dagbag, bundle.name, None, session=session) + bundle_dagbag = DagBag(dag_folder=bundle.path, include_examples=False) + sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) elif AIRFLOW_V_3_0_PLUS: + dagbag = DagBag(dag_folder=folder, include_examples=include_examples) dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: + dagbag = DagBag(dag_folder=folder, include_examples=include_examples) dagbag.sync_to_db(session=session) # type: ignore[attr-defined] return dagbag From 84380f0424fd74f5bbff0bdcbf101ff58a664e35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 1 May 2026 16:22:58 -0300 Subject: [PATCH 09/17] Trigger CI rerun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Ahlert From aea523be895d76d3a249bef41663e69400be5c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Sat, 2 May 2026 03:07:21 -0300 Subject: [PATCH 10/17] Update devel-common/src/tests_common/test_utils/db.py Co-authored-by: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> --- devel-common/src/tests_common/test_utils/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 886ba600c69a8..cac2ea9d8fc0b 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -204,7 +204,7 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): from airflow.dag_processing.dagbag import sync_bag_to_db except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] - # On 3.1+, example DAGs are exposed as their own bundles + # On 3.3+, example DAGs are exposed as their own bundles # (``example_dags`` for core, ``airflow-provider-*-example-dags`` # for each provider that ships an ``example_dags`` folder). The # bundle loop below already syncs every one of them, so the From 4cf8023629ca741e78a5e525a51c5070b54d849f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Sat, 2 May 2026 03:07:45 -0300 Subject: [PATCH 11/17] Update airflow-core/src/airflow/dag_processing/bundles/manager.py Co-authored-by: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> --- airflow-core/src/airflow/dag_processing/bundles/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 199af8f52c5db..da54d59ecf9a7 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -137,7 +137,7 @@ def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundl try: module = importlib.import_module(module_name) except ImportError: - log.debug("Could not import provider module %s for example DAG discovery", module_name) + log.warning("Could not import provider module %s for example DAG discovery", module_name) continue for module_path in getattr(module, "__path__", []): From 3c576499351be384e94c520d3d0aa571a82bd161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Sun, 3 May 2026 10:00:53 -0300 Subject: [PATCH 12/17] Address review feedback on provider example DAG bundle discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Hoist `importlib`, `logging`, and `ProvidersManager` to module top in `dag_processing/bundles/manager.py` and add a module-level logger. - Reverse the `apache-airflow-providers-` prefix check so the canonical case reads first, per @jscheffl's nit. - Broaden the exception handler around `import_module` and the subsequent `__path__` access to `Exception` with `log.exception`, so a provider with a custom `__getattr__` can no longer crash config loading on the scheduler/api-server. - Switch the provider-example bundle dedup key from `bundle_name` to the resolved `example_dag_folder` and document the namespace-package scenario the guard exists for (multiple `airflow.providers.common.*` distributions sharing one namespace via `pkgutil.extend_path`). - Rename the per-provider bundle from `airflow-provider-{package}-example-dags` to `{package}-example-dags`, matching the package distribution name surfaced to users in REST API responses and `pip list`. - Emit a `DeprecationWarning` from `DagBag.collect_dags` and from `tests_common.test_utils.db.parse_and_sync_to_db` when callers pass `include_examples=True`, and update the docstrings to direct callers at the `[core] load_examples` configuration option. - Migrate the in-tree `parse_and_sync_to_db(..., include_examples=True)` callers to use `conf_vars({("core", "load_examples"): "true"})`. - Update `test_task_instances.py` bundle-name assertions and the `test_get_all_bundle_names` suffix check for the new format. - Add `airflow-core/newsfragments/66161.significant.rst` covering the user-visible REST-API bundle-name change. The package-name -> module-path heuristic is kept for now; replacing it with an authoritative field on `ProviderInfo` is tracked in #66305. Signed-off-by: André Ahlert --- .../newsfragments/66161.significant.rst | 58 +++++++++++++++++++ .../airflow/dag_processing/bundles/manager.py | 40 ++++++++----- .../src/airflow/dag_processing/dagbag.py | 22 ++++++- .../tests/unit/api_fastapi/conftest.py | 3 +- .../routes/public/test_task_instances.py | 48 +++++++-------- .../unit/cli/commands/test_asset_command.py | 4 +- .../cli/commands/test_backfill_command.py | 4 +- .../unit/cli/commands/test_dag_command.py | 2 +- .../unit/cli/commands/test_task_command.py | 3 +- .../bundles/test_dag_bundle_manager.py | 4 +- .../src/tests_common/test_utils/db.py | 25 +++++++- .../fab/tests/unit/fab/www/views/conftest.py | 3 +- 12 files changed, 165 insertions(+), 51 deletions(-) create mode 100644 airflow-core/newsfragments/66161.significant.rst diff --git a/airflow-core/newsfragments/66161.significant.rst b/airflow-core/newsfragments/66161.significant.rst new file mode 100644 index 0000000000000..e0722395aa4a4 --- /dev/null +++ b/airflow-core/newsfragments/66161.significant.rst @@ -0,0 +1,58 @@ +Provider example DAGs are exposed as dedicated bundles + +Example DAGs that ship with provider distributions are now discovered via +``ProvidersManager`` and registered as their own DAG bundles, one per +provider that ships an ``example_dags/`` folder. Bundle names follow the +shape ``apache-airflow-providers--example-dags`` (for +canonical Apache providers) or ``-example-dags`` (for +third-party providers). The ``[core] load_examples`` configuration option +remains the single switch that controls whether any example bundles are +registered. + +**What changed:** + +- Example DAGs that previously came in under the implicit ``dags-folder`` + bundle are now persisted in ``DagBundleModel`` rows and emitted in REST + API responses (``GET /api/v2/dags/{dag_id}/dag-versions`` and the + ``bundle_name`` field on task-instance responses) under the new + per-provider bundle names. +- Nested providers such as ``apache-airflow-providers-common-sql`` are + discovered correctly (previously they were missed because discovery + walked ``airflow.providers.__path__`` directly). + +**Behaviour changes:** + +- Clients filtering or tracking bundles by ``"dags-folder"`` for + previously-shipped example DAGs (e.g. ``example_python_operator``) need + to update to the new per-provider bundle names. The DAG identifiers + themselves are unchanged. + +**Deprecations:** + +- The ``include_examples`` parameter on ``DagBag.collect_dags`` and on + ``tests_common.test_utils.db.parse_and_sync_to_db`` is now back-compat + only. Passing ``include_examples=True`` emits a + :class:`DeprecationWarning`. Tests and external callers that need + example DAGs loaded should set the ``[core] load_examples`` config to + ``true`` (e.g. via ``conf_vars({("core", "load_examples"): "true"})``) + instead. + +* Types of change + + * [ ] Dag changes + * [x] Config changes + * [x] API changes + * [ ] CLI changes + * [x] Behaviour changes + * [ ] Plugin changes + * [ ] Dependency changes + * [x] Code interface changes + +* Migration rules needed + + * Update clients that filter REST API responses by ``bundle_name`` to + match the new per-provider bundle names for example DAGs. + * Replace ``include_examples=True`` calls to ``DagBag`` / + ``parse_and_sync_to_db`` with + ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent + configuration) before the deprecated argument is removed. diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index da54d59ecf9a7..78c54266eda9f 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -16,6 +16,8 @@ # under the License. from __future__ import annotations +import importlib +import logging import os import warnings from typing import TYPE_CHECKING @@ -30,6 +32,7 @@ from airflow.exceptions import AirflowConfigException from airflow.models.dagbundle import DagBundleModel from airflow.models.team import Team +from airflow.providers_manager import ProvidersManager from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.session import NEW_SESSION, provide_session @@ -38,6 +41,8 @@ from sqlalchemy.orm import Session +log = logging.getLogger(__name__) + _example_dag_bundle_name = "example_dags" @@ -119,35 +124,38 @@ def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundl - providers installed outside the ``airflow.providers`` namespace package are discovered via their entry point. """ - import importlib - import logging - - from airflow.providers_manager import ProvidersManager - - log = logging.getLogger(__name__) + # Dedup on the resolved on-disk folder rather than the bundle name: distributions + # under ``airflow.providers.common.*`` use ``pkgutil.extend_path``, so when several + # ``common-*`` packages are installed ``airflow.providers.common.__path__`` has + # multiple entries and the inner loop iterates more than once. Path-based dedup + # only skips when the same folder is seen twice; distinct folders are preserved. seen: set[str] = set() for package_name in ProvidersManager().providers: - # apache-airflow-providers-foo-bar -> airflow.providers.foo.bar - if not package_name.startswith("apache-airflow-providers-"): - module_name = package_name.replace("-", "_") - else: + # Heuristic: derive the import path from the canonical + # ``apache-airflow-providers-*`` distribution name. Tracked as a follow-up + # to record the provider module path on ``ProviderInfo`` (see + # https://github.com/apache/airflow/issues/66305). + if package_name.startswith("apache-airflow-providers-"): suffix = package_name[len("apache-airflow-providers-") :] module_name = "airflow.providers." + suffix.replace("-", ".") + else: + module_name = package_name.replace("-", "_") try: module = importlib.import_module(module_name) - except ImportError: - log.warning("Could not import provider module %s for example DAG discovery", module_name) + module_paths = list(getattr(module, "__path__", [])) + except Exception: + log.exception("Could not load provider module %s for example DAG discovery", module_name) continue - for module_path in getattr(module, "__path__", []): + for module_path in module_paths: example_dag_folder = os.path.join(module_path, "example_dags") if not os.path.isdir(example_dag_folder): continue - bundle_name = f"airflow-provider-{package_name}-example-dags" - if bundle_name in seen: + if example_dag_folder in seen: continue - seen.add(bundle_name) + seen.add(example_dag_folder) + bundle_name = f"{package_name}-example-dags" bundle_config_list.append( _ExternalBundleConfig( name=bundle_name, diff --git a/airflow-core/src/airflow/dag_processing/dagbag.py b/airflow-core/src/airflow/dag_processing/dagbag.py index cd58d10163596..75fe7e1ffe2a7 100644 --- a/airflow-core/src/airflow/dag_processing/dagbag.py +++ b/airflow-core/src/airflow/dag_processing/dagbag.py @@ -172,8 +172,12 @@ class DagBag(LoggingMixin): that one system can run multiple, independent settings sets. :param dag_folder: the folder to scan to find DAGs - :param include_examples: whether to include the examples that ship - with airflow or not + :param include_examples: back-compat-only on Airflow 3.1+. Example DAGs are + now exposed as dedicated bundles (``example_dags`` for core, + ``apache-airflow-providers-*-example-dags`` for each provider that + ships an ``example_dags`` folder), and the ``[core] load_examples`` + config controls whether those bundles are registered. Passing + ``include_examples=True`` here emits a :class:`DeprecationWarning`. :param safe_mode: when ``False``, scans all python modules for dags. When ``True`` uses heuristics (files containing ``DAG`` and ``airflow`` strings) to filter python modules to scan for dags. @@ -465,7 +469,21 @@ def collect_dags( **Note**: The patterns in ``.airflowignore`` are interpreted as either un-anchored regexes or gitignore-like glob expressions, depending on the ``DAG_IGNORE_FILE_SYNTAX`` configuration parameter. + + ``include_examples`` is a back-compat-only argument on Airflow 3.1+; + example DAGs are loaded via dedicated bundles gated by the + ``[core] load_examples`` configuration option. Passing + ``include_examples=True`` emits a :class:`DeprecationWarning`. """ + if include_examples is True: + warnings.warn( + "include_examples=True is deprecated for DagBag.collect_dags. " + "Example DAGs are now loaded via dedicated bundles controlled by " + "the [core] load_examples configuration option. This argument " + "will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) self.log.info("Filling up the DagBag from %s", dag_folder) dag_folder = dag_folder or self.dag_folder # Used to store stats around DagBag processing diff --git a/airflow-core/tests/unit/api_fastapi/conftest.py b/airflow-core/tests/unit/api_fastapi/conftest.py index aace17f8a1479..03c43a178a090 100644 --- a/airflow-core/tests/unit/api_fastapi/conftest.py +++ b/airflow-core/tests/unit/api_fastapi/conftest.py @@ -187,7 +187,8 @@ def make_dag_with_multiple_versions(dag_maker, configure_git_connection_for_dag_ def dagbag(): from airflow.models.dagbag import DBDagBag - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) return DBDagBag() diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 7c7221d353d2f..880cd69bb2676 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -198,7 +198,7 @@ def test_should_respond_200(self, test_client, session): assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -356,7 +356,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -420,7 +420,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -476,7 +476,7 @@ def test_should_respond_200_task_instance_with_rendered(self, test_client, sessi assert response.json() == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -596,7 +596,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, test_client, se assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2469,7 +2469,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2515,7 +2515,7 @@ def test_should_respond_200_with_different_try_numbers(self, test_client, try_nu "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2592,7 +2592,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers( "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2664,7 +2664,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2711,7 +2711,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -3517,7 +3517,7 @@ def test_should_respond_200_with_dag_run_id( "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4009,7 +4009,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4046,7 +4046,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4117,7 +4117,7 @@ def test_ti_in_retry_state_not_returned(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4200,7 +4200,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4237,7 +4237,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4439,7 +4439,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4715,7 +4715,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, @@ -4853,7 +4853,7 @@ def test_update_mask_set_note_should_respond_200( "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4916,7 +4916,7 @@ def test_set_note_should_respond_200(self, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4997,7 +4997,7 @@ def test_set_note_should_respond_200_mapped_task_with_rtif(self, test_client, se "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5080,7 +5080,7 @@ def test_set_note_should_respond_200_mapped_task_summary_with_rtif(self, test_cl "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_ti["dag_version"]["created_at"], @@ -5196,7 +5196,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5484,7 +5484,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, diff --git a/airflow-core/tests/unit/cli/commands/test_asset_command.py b/airflow-core/tests/unit/cli/commands/test_asset_command.py index 7b17f2a5cea52..6efd8293534f3 100644 --- a/airflow-core/tests/unit/cli/commands/test_asset_command.py +++ b/airflow-core/tests/unit/cli/commands/test_asset_command.py @@ -28,6 +28,7 @@ from airflow.cli import cli_parser from airflow.cli.commands import asset_command +from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_dags, clear_db_runs, parse_and_sync_to_db if typing.TYPE_CHECKING: @@ -38,7 +39,8 @@ @pytest.fixture(scope="module", autouse=True) def prepare_examples(): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) yield clear_db_runs() clear_db_dags() diff --git a/airflow-core/tests/unit/cli/commands/test_backfill_command.py b/airflow-core/tests/unit/cli/commands/test_backfill_command.py index 79d133f84c88a..8f956d331d2f5 100644 --- a/airflow-core/tests/unit/cli/commands/test_backfill_command.py +++ b/airflow-core/tests/unit/cli/commands/test_backfill_command.py @@ -30,6 +30,7 @@ from airflow.cli import cli_parser from airflow.models.backfill import ReprocessBehavior +from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_backfills, clear_db_dags, clear_db_runs, parse_and_sync_to_db DEFAULT_DATE = timezone.make_aware(datetime(2015, 1, 1), timezone=timezone.utc) @@ -48,7 +49,8 @@ class TestCliBackfill: @classmethod def setup_class(cls): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) cls.parser = cli_parser.get_parser() @classmethod diff --git a/airflow-core/tests/unit/cli/commands/test_dag_command.py b/airflow-core/tests/unit/cli/commands/test_dag_command.py index e73f2eebbaa95..f8c4a1be6c24f 100644 --- a/airflow-core/tests/unit/cli/commands/test_dag_command.py +++ b/airflow-core/tests/unit/cli/commands/test_dag_command.py @@ -82,7 +82,7 @@ class TestCliDags: @classmethod def setup_class(cls): with conf_vars({("core", "load_examples"): "True"}): - parse_and_sync_to_db(os.devnull, include_examples=True) + parse_and_sync_to_db(os.devnull) cls.parser = cli_parser.get_parser() @classmethod diff --git a/airflow-core/tests/unit/cli/commands/test_task_command.py b/airflow-core/tests/unit/cli/commands/test_task_command.py index da5c9896c3bcf..a6504a71828ec 100644 --- a/airflow-core/tests/unit/cli/commands/test_task_command.py +++ b/airflow-core/tests/unit/cli/commands/test_task_command.py @@ -87,7 +87,8 @@ class TestCliTasks: @classmethod def setup_class(cls): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) cls.parser = cli_parser.get_parser() clear_db_runs() diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index 9bae2c4acbafb..0d17069c831dd 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -473,6 +473,6 @@ def test_get_all_bundle_names(): # Any other bundle exposed here comes from a provider's example_dags # folder discovered via ProvidersManager. Their presence depends on # which providers are installed in the environment, so we only check - # the naming prefix instead of pinning an exact list. + # the naming suffix instead of pinning an exact list. extra = [n for n in bundle_names if n not in {"dags-folder", "example_dags"}] - assert all(n.startswith("airflow-provider-") for n in extra) + assert all(n.endswith("-example-dags") for n in extra) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index cac2ea9d8fc0b..1c5a82c64d5fb 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -21,6 +21,7 @@ import json import logging import os +import warnings from tempfile import gettempdir from typing import TYPE_CHECKING @@ -186,6 +187,28 @@ def initial_db_init(): def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): + """ + Parse DAGs in ``folder`` and sync them to the metadata database. + + On Airflow 3.1+, ``include_examples`` is back-compat-only: example DAGs + are exposed as dedicated bundles (``example_dags`` for core, + ``apache-airflow-providers-*-example-dags`` for each provider that ships + an ``example_dags`` folder), and whether they are loaded is controlled by + the ``[core] load_examples`` configuration option, not by this argument. + Tests that need example DAGs should set ``conf_vars({("core", "load_examples"): "true"})`` + instead. Passing ``include_examples=True`` on 3.1+ emits a + :class:`DeprecationWarning`. + """ + if AIRFLOW_V_3_1_PLUS and include_examples is True: + warnings.warn( + "include_examples=True is deprecated for parse_and_sync_to_db on " + "Airflow 3.1+. Example DAGs are now loaded via dedicated bundles " + "controlled by the [core] load_examples configuration option. Set " + "conf_vars({('core', 'load_examples'): 'true'}) in the test instead.", + DeprecationWarning, + stacklevel=2, + ) + if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag else: @@ -205,7 +228,7 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] # On 3.3+, example DAGs are exposed as their own bundles - # (``example_dags`` for core, ``airflow-provider-*-example-dags`` + # (``example_dags`` for core, ``apache-airflow-providers-*-example-dags`` # for each provider that ships an ``example_dags`` folder). The # bundle loop below already syncs every one of them, so the # ``dags-folder`` DagBag must NOT pull example DAGs in too, diff --git a/providers/fab/tests/unit/fab/www/views/conftest.py b/providers/fab/tests/unit/fab/www/views/conftest.py index 3c6e047deb994..96c423947e11b 100644 --- a/providers/fab/tests/unit/fab/www/views/conftest.py +++ b/providers/fab/tests/unit/fab/www/views/conftest.py @@ -43,7 +43,8 @@ def session(): @pytest.fixture(autouse=True, scope="module") def examples_dag_bag(session): - dag_bag = parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + dag_bag = parse_and_sync_to_db(os.devnull) session.commit() return dag_bag From 0b18521e52c793a1946ecbfffe495858ce30845d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Mon, 4 May 2026 07:31:13 -0300 Subject: [PATCH 13/17] Remove include_examples parameter from DagBag and test helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the include_examples knob from DagBag.__init__, BundleDagBag.__init__, collect_dags, and parse_and_sync_to_db. Example DAGs now come in exclusively through the per-provider example bundles registered when [core] load_examples is enabled. Removes the deprecation warnings, updates all callers across core, devel-common, and providers, and gates the few provider tests that still need to read examples on older Airflow versions behind AIRFLOW_V_3_3_PLUS. Signed-off-by: André Ahlert --- .../newsfragments/66161.significant.rst | 25 +++-- .../src/airflow/dag_processing/dagbag.py | 37 ------- .../tests/integration/otel/test_otel.py | 2 +- .../tests/unit/always/test_example_dags.py | 3 - .../core_api/routes/public/test_backfills.py | 2 +- .../routes/public/test_dag_sources.py | 2 +- .../routes/public/test_task_instances.py | 2 +- .../unit/cli/commands/test_config_command.py | 2 - .../unit/cli/commands/test_dag_command.py | 6 +- .../unit/cli/commands/test_pool_command.py | 2 +- .../unit/cli/commands/test_team_command.py | 2 +- .../cli/commands/test_variable_command.py | 2 +- airflow-core/tests/unit/cli/conftest.py | 2 +- .../unit/core/test_impersonation_tests.py | 2 +- .../tests/unit/dag_processing/test_dagbag.py | 98 +++++++++---------- .../tests/unit/dag_processing/test_manager.py | 3 +- .../tests/unit/jobs/test_scheduler_job.py | 16 ++- airflow-core/tests/unit/models/test_dag.py | 2 +- airflow-core/tests/unit/models/test_dagrun.py | 2 +- .../serialization/test_dag_serialization.py | 6 +- .../src/tests_common/pytest_plugin.py | 4 +- .../src/tests_common/test_utils/db.py | 35 +++---- .../google/cloud/operators/test_dataproc.py | 2 +- .../google/cloud/operators/test_looker.py | 2 +- .../openlineage/plugins/test_execution.py | 2 - .../sensors/test_external_task_sensor.py | 18 ++-- .../unit/standard/sensors/test_time_delta.py | 14 ++- .../unit/standard/sensors/test_weekday.py | 12 ++- 28 files changed, 133 insertions(+), 174 deletions(-) diff --git a/airflow-core/newsfragments/66161.significant.rst b/airflow-core/newsfragments/66161.significant.rst index e0722395aa4a4..aa4db6ba9f9f7 100644 --- a/airflow-core/newsfragments/66161.significant.rst +++ b/airflow-core/newsfragments/66161.significant.rst @@ -27,15 +27,20 @@ registered. to update to the new per-provider bundle names. The DAG identifiers themselves are unchanged. -**Deprecations:** +**Removals:** -- The ``include_examples`` parameter on ``DagBag.collect_dags`` and on - ``tests_common.test_utils.db.parse_and_sync_to_db`` is now back-compat - only. Passing ``include_examples=True`` emits a - :class:`DeprecationWarning`. Tests and external callers that need - example DAGs loaded should set the ``[core] load_examples`` config to - ``true`` (e.g. via ``conf_vars({("core", "load_examples"): "true"})``) - instead. +- The ``include_examples`` parameter has been removed from + ``DagBag.__init__``, ``DagBag.collect_dags``, ``BundleDagBag.__init__``, + and ``tests_common.test_utils.db.parse_and_sync_to_db``. Example DAG + loading is now controlled exclusively by the ``[core] load_examples`` + configuration option, which gates whether the per-provider example + bundles are registered. Callers that previously passed + ``include_examples=True`` should set + ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent + configuration). Callers that previously passed + ``include_examples=False`` can drop the argument: it matches the new + default behaviour where ``DagBag`` only walks the configured + ``dag_folder`` and example DAGs come in via dedicated bundles. * Types of change @@ -55,4 +60,6 @@ registered. * Replace ``include_examples=True`` calls to ``DagBag`` / ``parse_and_sync_to_db`` with ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent - configuration) before the deprecated argument is removed. + configuration). The argument has been removed. + * Drop ``include_examples=False`` arguments from ``DagBag`` / + ``parse_and_sync_to_db`` calls; the default behaviour is unchanged. diff --git a/airflow-core/src/airflow/dag_processing/dagbag.py b/airflow-core/src/airflow/dag_processing/dagbag.py index 75fe7e1ffe2a7..c59ae47b9f86c 100644 --- a/airflow-core/src/airflow/dag_processing/dagbag.py +++ b/airflow-core/src/airflow/dag_processing/dagbag.py @@ -172,12 +172,6 @@ class DagBag(LoggingMixin): that one system can run multiple, independent settings sets. :param dag_folder: the folder to scan to find DAGs - :param include_examples: back-compat-only on Airflow 3.1+. Example DAGs are - now exposed as dedicated bundles (``example_dags`` for core, - ``apache-airflow-providers-*-example-dags`` for each provider that - ships an ``example_dags`` folder), and the ``[core] load_examples`` - config controls whether those bundles are registered. Passing - ``include_examples=True`` here emits a :class:`DeprecationWarning`. :param safe_mode: when ``False``, scans all python modules for dags. When ``True`` uses heuristics (files containing ``DAG`` and ``airflow`` strings) to filter python modules to scan for dags. @@ -191,7 +185,6 @@ class DagBag(LoggingMixin): def __init__( self, dag_folder: str | Path | None = None, # todo AIP-66: rename this to path - include_examples: bool | ArgNotSet = NOTSET, safe_mode: bool | ArgNotSet = NOTSET, load_op_links: bool = True, collect_dags: bool = True, @@ -222,11 +215,6 @@ def __init__( if collect_dags: self.collect_dags( dag_folder=dag_folder, - include_examples=( - include_examples - if is_arg_set(include_examples) - else conf.getboolean("core", "LOAD_EXAMPLES") - ), safe_mode=( safe_mode if is_arg_set(safe_mode) else conf.getboolean("core", "DAG_DISCOVERY_SAFE_MODE") ), @@ -455,7 +443,6 @@ def collect_dags( self, dag_folder: str | Path | None = None, only_if_updated: bool = True, - include_examples: bool = conf.getboolean("core", "LOAD_EXAMPLES"), safe_mode: bool = conf.getboolean("core", "DAG_DISCOVERY_SAFE_MODE"), ): """ @@ -469,21 +456,7 @@ def collect_dags( **Note**: The patterns in ``.airflowignore`` are interpreted as either un-anchored regexes or gitignore-like glob expressions, depending on the ``DAG_IGNORE_FILE_SYNTAX`` configuration parameter. - - ``include_examples`` is a back-compat-only argument on Airflow 3.1+; - example DAGs are loaded via dedicated bundles gated by the - ``[core] load_examples`` configuration option. Passing - ``include_examples=True`` emits a :class:`DeprecationWarning`. """ - if include_examples is True: - warnings.warn( - "include_examples=True is deprecated for DagBag.collect_dags. " - "Example DAGs are now loaded via dedicated bundles controlled by " - "the [core] load_examples configuration option. This argument " - "will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) self.log.info("Filling up the DagBag from %s", dag_folder) dag_folder = dag_folder or self.dag_folder # Used to store stats around DagBag processing @@ -565,17 +538,7 @@ def __init__(self, *args, bundle_path: Path | None = None, **kwargs): if str(bundle_path) not in sys.path: sys.path.append(str(bundle_path)) - # Warn if user explicitly set include_examples=True, since bundles never contain examples - if kwargs.get("include_examples") is True: - warnings.warn( - "include_examples=True is ignored for BundleDagBag. " - "Bundles do not contain example DAGs, so include_examples is always False.", - UserWarning, - stacklevel=2, - ) - kwargs["bundle_path"] = bundle_path - kwargs["include_examples"] = False super().__init__(*args, **kwargs) diff --git a/airflow-core/tests/integration/otel/test_otel.py b/airflow-core/tests/integration/otel/test_otel.py index 1b23fe0f7549a..5119738c2065f 100644 --- a/airflow-core/tests/integration/otel/test_otel.py +++ b/airflow-core/tests/integration/otel/test_otel.py @@ -252,7 +252,7 @@ def setup_class(cls): def serialize_and_get_dags(cls) -> dict[str, SerializedDAG]: log.info("Serializing Dags from directory %s", cls.dag_folder) # Load DAGs from the dag directory. - dag_bag = DagBag(dag_folder=cls.dag_folder, include_examples=False) + dag_bag = DagBag(dag_folder=cls.dag_folder) dag_ids = dag_bag.dag_ids assert len(dag_ids) == 1 diff --git a/airflow-core/tests/unit/always/test_example_dags.py b/airflow-core/tests/unit/always/test_example_dags.py index 0b84f6ae26ce5..6ef8a306b5774 100644 --- a/airflow-core/tests/unit/always/test_example_dags.py +++ b/airflow-core/tests/unit/always/test_example_dags.py @@ -212,7 +212,6 @@ def patch_get_dagbag_import_timeout(): def test_should_be_importable(example: str, patch_get_dagbag_import_timeout): dagbag = DagBag( dag_folder=example, - include_examples=False, ) if len(dagbag.import_errors) == 1 and "AirflowOptionalProviderFeatureException" in str( dagbag.import_errors @@ -231,7 +230,6 @@ def test_should_not_do_database_queries(example: str, patch_get_dagbag_import_ti with assert_queries_count(1, stacklevel_from_module=example.rsplit(os.sep, 1)[-1]): DagBag( dag_folder=example, - include_examples=False, ) @@ -243,7 +241,6 @@ def test_should_not_run_hook_connections(example: str, patch_get_dagbag_import_t mock_get_connection.return_value = Connection() DagBag( dag_folder=example, - include_examples=False, ) assert mock_get_connection.call_count == 0, ( f"BaseHook.get_connection() should not be called during DAG parsing. " diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py index 80495fc9a8d70..60882ff8a3900 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py @@ -86,7 +86,7 @@ def make_dags(): with DAG(DAG3_ID, schedule=None) as dag3: # DAG start_date set to None EmptyOperator(task_id=TASK_ID, start_date=datetime(2019, 6, 12)) - dag_bag = DagBag(os.devnull, include_examples=False) + dag_bag = DagBag(os.devnull) dag_bag.dags = {dag.dag_id: dag, dag2.dag_id: dag2, dag3.dag_id: dag3} diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py index 59970da27ea6b..fe9d523c86ad8 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py @@ -47,7 +47,7 @@ @pytest.fixture def real_dag_bag(): - return parse_and_sync_to_db(EXAMPLE_DAG_FILE, include_examples=False) + return parse_and_sync_to_db(EXAMPLE_DAG_FILE) @pytest.fixture diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 880cd69bb2676..c252f3e4d8c5f 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -733,7 +733,7 @@ def create_dag_runs_with_mapped_tasks(self, dag_maker, session, dags=None): session.add(ti) DagBundlesManager().sync_bundles_to_db() - dagbag = DagBag(os.devnull, include_examples=False) + dagbag = DagBag(os.devnull) dagbag.dags = {dag_id: dag_maker.dag} sync_bag_to_db(dagbag, "dags-folder", None) session.flush() diff --git a/airflow-core/tests/unit/cli/commands/test_config_command.py b/airflow-core/tests/unit/cli/commands/test_config_command.py index 008554422624b..efe5ad5cf296e 100644 --- a/airflow-core/tests/unit/cli/commands/test_config_command.py +++ b/airflow-core/tests/unit/cli/commands/test_config_command.py @@ -45,7 +45,6 @@ def test_cli_show_config_should_write_data(self, mock_conf, mock_stringio): mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section=None, - include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, @@ -65,7 +64,6 @@ def test_cli_show_config_should_write_data_specific_section(self, mock_conf, moc mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section="core", - include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, diff --git a/airflow-core/tests/unit/cli/commands/test_dag_command.py b/airflow-core/tests/unit/cli/commands/test_dag_command.py index f8c4a1be6c24f..c2c3dae4b51f3 100644 --- a/airflow-core/tests/unit/cli/commands/test_dag_command.py +++ b/airflow-core/tests/unit/cli/commands/test_dag_command.py @@ -253,7 +253,7 @@ def test_next_execution(self, dag_id, delta, schedule, catchup, first, second, t print(file_content) with time_machine.travel(DEFAULT_DATE): clear_db_dags() - parse_and_sync_to_db(tmp_path, include_examples=False) + parse_and_sync_to_db(tmp_path) # Test num-executions = 1 (default) args = self.parser.parse_args(["dags", "next-execution", dag_id]) @@ -915,7 +915,7 @@ def test_dag_with_parsing_context( path_to_parse = TEST_DAGS_FOLDER / "test_dag_parsing_context.py" with configure_testing_dag_bundle(path_to_parse): - bag = DagBag(dag_folder=path_to_parse, include_examples=False) + bag = DagBag(dag_folder=path_to_parse) sync_bag_to_db(bag, "testing", None) cli_args = self.parser.parse_args( ["dags", "test", "test_dag_parsing_context", DEFAULT_DATE.isoformat()] @@ -1009,7 +1009,7 @@ def test_get_dag_excludes_examples_with_bundle(self, configure_testing_dag_bundl from airflow.utils.cli import get_dag as get_bagged_dag # type: ignore with configure_testing_dag_bundle(TEST_DAGS_FOLDER / "test_sensor.py"): - # example DAG should not be found since include_examples=False + # example DAG should not be found since the testing bundle only exposes test_sensor.py with pytest.raises(AirflowException, match="could not be found"): get_bagged_dag(bundle_names=["testing"], dag_id="example_simplest_dag") diff --git a/airflow-core/tests/unit/cli/commands/test_pool_command.py b/airflow-core/tests/unit/cli/commands/test_pool_command.py index 8fea33d7a7ffa..828497e9c2d38 100644 --- a/airflow-core/tests/unit/cli/commands/test_pool_command.py +++ b/airflow-core/tests/unit/cli/commands/test_pool_command.py @@ -35,7 +35,7 @@ class TestCliPools: @classmethod def setup_class(cls): - cls.dagbag = models.DagBag(include_examples=True) + cls.dagbag = models.DagBag() cls.parser = cli_parser.get_parser() settings.configure_orm() cls.session = Session diff --git a/airflow-core/tests/unit/cli/commands/test_team_command.py b/airflow-core/tests/unit/cli/commands/test_team_command.py index 55892489f8b77..a10837894a780 100644 --- a/airflow-core/tests/unit/cli/commands/test_team_command.py +++ b/airflow-core/tests/unit/cli/commands/test_team_command.py @@ -52,7 +52,7 @@ def _cleanup(cls): @classmethod def setup_class(cls): - cls.dagbag = models.DagBag(include_examples=True) + cls.dagbag = models.DagBag() cls.parser = cli_parser.get_parser() settings.configure_orm() cls.session = Session diff --git a/airflow-core/tests/unit/cli/commands/test_variable_command.py b/airflow-core/tests/unit/cli/commands/test_variable_command.py index 21d2fb66822b5..e9f4b94f30840 100644 --- a/airflow-core/tests/unit/cli/commands/test_variable_command.py +++ b/airflow-core/tests/unit/cli/commands/test_variable_command.py @@ -120,7 +120,7 @@ def _create(data, format="yaml", filename=None): class TestCliVariables: @classmethod def setup_class(cls): - cls.dagbag = models.DagBag(include_examples=True) + cls.dagbag = models.DagBag() cls.parser = cli_parser.get_parser() def setup_method(self): diff --git a/airflow-core/tests/unit/cli/conftest.py b/airflow-core/tests/unit/cli/conftest.py index 2967e48cd6c1b..b9be97bfc9f42 100644 --- a/airflow-core/tests/unit/cli/conftest.py +++ b/airflow-core/tests/unit/cli/conftest.py @@ -56,7 +56,7 @@ def load_examples(): @pytest.fixture(scope="session") def dagbag(): - return DagBag(include_examples=True) + return DagBag() @pytest.fixture(scope="session") diff --git a/airflow-core/tests/unit/core/test_impersonation_tests.py b/airflow-core/tests/unit/core/test_impersonation_tests.py index 8165d1f6d73f6..7325cb3ef5ba2 100644 --- a/airflow-core/tests/unit/core/test_impersonation_tests.py +++ b/airflow-core/tests/unit/core/test_impersonation_tests.py @@ -167,7 +167,7 @@ def setup_impersonation_tests(self, create_airflow_home): @staticmethod def get_dagbag(dag_folder): """Get DagBag and print statistic into the log.""" - dagbag = DagBag(dag_folder=dag_folder, include_examples=False) + dagbag = DagBag(dag_folder=dag_folder) logger.info("Loaded DAGs:") logger.info(dagbag.dagbag_report()) return dagbag diff --git a/airflow-core/tests/unit/dag_processing/test_dagbag.py b/airflow-core/tests/unit/dag_processing/test_dagbag.py index 49fa6f8ebe950..99abd92a59f73 100644 --- a/airflow-core/tests/unit/dag_processing/test_dagbag.py +++ b/airflow-core/tests/unit/dag_processing/test_dagbag.py @@ -353,20 +353,18 @@ def teardown_class(self): def test_dagbag_with_bundle_name(self, tmp_path): """Test that DagBag constructor accepts and stores bundle_name parameter.""" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False, bundle_name="test_bundle") + dagbag = DagBag(dag_folder=os.fspath(tmp_path), bundle_name="test_bundle") assert dagbag.bundle_name == "test_bundle" # Test with None (default) - dagbag2 = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag2 = DagBag(dag_folder=os.fspath(tmp_path)) assert dagbag2.bundle_name is None def test_get_existing_dag(self, tmp_path, standard_example_dags_folder): """ Test that we're able to parse some example DAGs and retrieve them """ - dagbag = DagBag( - dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle" - ) + dagbag = DagBag(dag_folder=standard_example_dags_folder, bundle_name="test_bundle") some_expected_dag_ids = ["example_bash_operator", "example_python_operator"] @@ -382,7 +380,7 @@ def test_get_non_existing_dag(self, tmp_path): """ test that retrieving a non existing dag id returns None without crashing """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) non_existing_dag_id = "non_existing_dag_id" assert dagbag.get_dag(non_existing_dag_id) is None @@ -398,7 +396,7 @@ def test_dont_load_example(self, tmp_path): """ test that the example are not loaded """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert dagbag.size() == 0 @@ -411,7 +409,7 @@ def test_safe_mode_heuristic_match(self, tmp_path): path.write_text("# airflow\n# DAG") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 1 assert dagbag.dagbag_stats[0].file == path.name @@ -424,7 +422,7 @@ def test_safe_mode_heuristic_mismatch(self, tmp_path): path = tmp_path / "testfile.py" path.write_text("") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 0 def test_safe_mode_disabled(self, tmp_path): @@ -432,7 +430,7 @@ def test_safe_mode_disabled(self, tmp_path): path = tmp_path / "testfile.py" path.write_text("") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=False) + dagbag = DagBag(safe_mode=False) assert len(dagbag.dagbag_stats) == 1 assert dagbag.dagbag_stats[0].file == path.name @@ -454,7 +452,7 @@ def test_dagbag_stats_file_is_relative_path_with_mixed_separators(self, tmp_path # but the filesystem returns paths with backslashes dags_folder_with_forward_slashes = path.parent.as_posix() with conf_vars({("core", "dags_folder"): dags_folder_with_forward_slashes}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 1 assert dagbag.dagbag_stats[0].file == path.name @@ -470,7 +468,6 @@ def test_dagbag_stats_includes_bundle_info(self, tmp_path): with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): dagbag = DagBag( - include_examples=False, safe_mode=True, bundle_path=bundle_path, bundle_name=bundle_name, @@ -487,7 +484,7 @@ def test_dagbag_stats_bundle_info_none_when_not_provided(self, tmp_path): path.write_text("# airflow\n# DAG") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 1 stat = dagbag.dagbag_stats[0] @@ -501,12 +498,12 @@ def test_process_file_that_contains_multi_bytes_char(self, tmp_path): path = tmp_path / "testfile.py" path.write_text("\u3042") # write multi-byte char (hiragana) - dagbag = DagBag(dag_folder=os.fspath(path.parent), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(path.parent)) assert dagbag.process_file(os.fspath(path)) == [] def test_process_file_duplicated_dag_id(self, tmp_path): """Loading a DAG with ID that already existed in a DAG bag should result in an import error.""" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) def create_dag(): from airflow.sdk import dag @@ -547,7 +544,6 @@ def test_import_errors_use_relative_path_with_bundle(self, tmp_path): dagbag = DagBag( dag_folder=os.fspath(dag_path), - include_examples=False, bundle_path=bundle_path, bundle_name="test_bundle", ) @@ -580,7 +576,6 @@ def my_flow(): dagbag = DagBag( dag_folder=os.fspath(bundle_path), - include_examples=False, bundle_path=bundle_path, bundle_name="test_bundle", ) @@ -606,7 +601,7 @@ def test_zip_skip_log(self, caplog, test_zip_path): it doesn't have "airflow" and "DAG" """ caplog.set_level(logging.INFO) - dagbag = DagBag(dag_folder=test_zip_path, include_examples=False) + dagbag = DagBag(dag_folder=test_zip_path) assert dagbag.has_logged assert ( @@ -619,7 +614,7 @@ def test_zip(self, tmp_path, test_zip_path): test the loading of a DAG within a zip file that includes dependencies """ syspath_before = deepcopy(sys.path) - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(test_zip_path) assert dagbag.get_dag("test_zip_dag") assert sys.path == syspath_before # sys.path doesn't change @@ -635,12 +630,12 @@ def test_process_dag_file_without_timeout( """ mocked_get_dagbag_import_timeout.return_value = 0 - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_sensor.py")) mocked_timeout.assert_not_called() mocked_get_dagbag_import_timeout.return_value = -1 - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_sensor.py")) mocked_timeout.assert_not_called() @@ -658,7 +653,7 @@ def test_process_dag_file_with_non_default_timeout( # ensure the test value is not equal to the default value assert timeout_value != settings.conf.getfloat("core", "DAGBAG_IMPORT_TIMEOUT") - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_sensor.py")) mocked_timeout.assert_called_once_with(timeout_value, error_message=mock.ANY) @@ -672,7 +667,7 @@ def test_check_value_type_from_get_dagbag_import_timeout( """ mocked_get_dagbag_import_timeout.return_value = "1" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) with pytest.raises( TypeError, match=r"Value \(1\) from get_dagbag_import_timeout must be int or float" ): @@ -694,7 +689,7 @@ def test_process_file_cron_validity_check( self, request: pytest.FixtureRequest, invalid_dag_name: str, tmp_path ): """Test if an invalid cron expression as schedule interval can be identified""" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert len(dagbag.import_errors) == 0 dagbag.process_file(request.getfixturevalue(invalid_dag_name)) assert len(dagbag.import_errors) == 1 @@ -710,7 +705,7 @@ def test_process_file_invalid_param_check(self, tmp_path): "test_invalid_param3.py", "test_invalid_param4.py", ] - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert len(dagbag.import_errors) == 0 for file in invalid_dag_files: @@ -726,7 +721,7 @@ def test_process_file_valid_param_check(self, tmp_path): "test_valid_param.py", "test_valid_param2.py", ] - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert len(dagbag.import_errors) == 0 for file in valid_dag_files: @@ -776,7 +771,7 @@ def test_get_dag_registration(self, file_name, expected_dag_id, standard_example pytest.importorskip("system.standard") file_to_load = standard_example_dags_folder / file_name expected_path = standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / file_name - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) dagbag.process_file(os.fspath(file_to_load)) dag = dagbag.get_dag(expected_dag_id) assert dag, f"{expected_dag_id} was bagged" @@ -795,7 +790,7 @@ def test_get_dag_registration(self, file_name, expected_dag_id, standard_example ), ) def test_get_zip_dag_registration(self, test_zip_path, expected): - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) dagbag.process_file(test_zip_path) for dag_id, path in expected.items(): dag = dagbag.get_dag(dag_id) @@ -803,7 +798,7 @@ def test_get_zip_dag_registration(self, test_zip_path, expected): assert dag.fileloc.endswith(f"{pathlib.Path(test_zip_path).parent}/{path}") def test_dag_registration_with_failure(self): - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) found = dagbag.process_file(str(TEST_DAGS_FOLDER / "test_invalid_dup_task.py")) assert found == [] @@ -818,7 +813,7 @@ def zip_with_valid_dag_and_dup_tasks(self, tmp_path: pathlib.Path) -> str: return os.fspath(zipped) def test_dag_registration_with_failure_zipped(self, zip_with_valid_dag_and_dup_tasks): - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) found = dagbag.process_file(zip_with_valid_dag_and_dup_tasks) assert len(found) == 1 assert [dag.dag_id for dag in found] == ["test_example_bash_operator"] @@ -843,7 +838,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 return super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(dag_folder=standard_example_dags_folder, include_examples=False) + dagbag = _TestDagBag(dag_folder=standard_example_dags_folder) assert dagbag.process_file_calls == 1 dag = dagbag.get_dag(dag_id) @@ -871,7 +866,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 return super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(dag_folder=os.path.realpath(test_zip_path), include_examples=False) + dagbag = _TestDagBag(dag_folder=os.path.realpath(test_zip_path)) assert dagbag.process_file_calls == 1 dag = dagbag.get_dag(dag_id) @@ -888,7 +883,7 @@ def process_dag(self, create_dag, tmp_path): path = tmp_path / "testfile.py" path.write_text(source) - dagbag = DagBag(dag_folder=os.fspath(path.parent), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(path.parent)) found_dags = dagbag.process_file(os.fspath(path)) return dagbag, found_dags, os.fspath(path) @@ -943,7 +938,7 @@ def test_process_file_with_none(self, tmp_path): """ test that process_file can handle Nones """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert dagbag.process_file(None) == [] @@ -969,7 +964,7 @@ def test_timeout_dag_errors_are_import_errors(self, tmp_path, caplog): """) with conf_vars({("core", "DAGBAG_IMPORT_TIMEOUT"): "0.01"}): - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert dag_file.as_posix() in dagbag.import_errors assert "DagBag import timeout for" in caplog.text @@ -999,7 +994,7 @@ def test_import_error_tracebacks(self, tmp_path, depth): with contextlib.ExitStack() as cm: if depth is not None: cm.enter_context(conf_vars({("core", "dagbag_import_error_traceback_depth"): str(depth)})) - dagbag = DagBag(dag_folder=unparseable_filename, include_examples=False) + dagbag = DagBag(dag_folder=unparseable_filename) import_errors = dagbag.import_errors assert unparseable_filename in import_errors @@ -1015,7 +1010,7 @@ def test_import_error_tracebacks_zip(self, tmp_path, depth): with contextlib.ExitStack() as cm: if depth is not None: cm.enter_context(conf_vars({("core", "dagbag_import_error_traceback_depth"): str(depth)})) - dagbag = DagBag(dag_folder=invalid_zip_filename, include_examples=False) + dagbag = DagBag(dag_folder=invalid_zip_filename) import_errors = dagbag.import_errors assert invalid_dag_filename in import_errors assert import_errors[invalid_dag_filename] == self._make_test_traceback(invalid_dag_filename, depth) @@ -1030,7 +1025,7 @@ def test_task_cluster_policy_violation(self): dag_id = "test_missing_owner" err_cls_name = "AirflowClusterPolicyViolation" - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert set() == set(dagbag.dag_ids) expected_import_errors = { dag_file: ( @@ -1052,7 +1047,7 @@ def test_task_cluster_policy_nonstring_owner(self): dag_id = "test_nonstring_owner" err_cls_name = "AirflowClusterPolicyViolation" - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert set() == set(dagbag.dag_ids) expected_import_errors = { dag_file: ( @@ -1071,7 +1066,7 @@ def test_task_cluster_policy_obeyed(self): """ dag_file = os.path.join(TEST_DAGS_FOLDER, "test_with_non_default_owner.py") - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert {"test_with_non_default_owner"} == set(dagbag.dag_ids) assert dagbag.import_errors == {} @@ -1080,14 +1075,13 @@ def test_task_cluster_policy_obeyed(self): def test_dag_cluster_policy_obeyed(self): dag_file = os.path.join(TEST_DAGS_FOLDER, "test_dag_with_no_tags.py") - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert len(dagbag.dag_ids) == 0 assert "has no tags" in dagbag.import_errors[dag_file] def test_dagbag_dag_collection(self): dagbag = DagBag( dag_folder=TEST_DAGS_FOLDER, - include_examples=False, collect_dags=False, bundle_name="test_collection", ) @@ -1098,15 +1092,15 @@ def test_dagbag_dag_collection(self): assert dagbag.dags # test that dagbag.dags is not empty if collect_dags is True - dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER, include_examples=False, bundle_name="test_collection") + dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER, bundle_name="test_collection") assert dagbag.dags def test_dabgag_captured_warnings(self): dag_file = os.path.join(TEST_DAGS_FOLDER, "test_dag_warnings.py") - dagbag = DagBag(dag_folder=dag_file, include_examples=False, collect_dags=False) + dagbag = DagBag(dag_folder=dag_file, collect_dags=False) assert dag_file not in dagbag.captured_warnings - dagbag.collect_dags(dag_folder=dagbag.dag_folder, include_examples=False, only_if_updated=False) + dagbag.collect_dags(dag_folder=dagbag.dag_folder, only_if_updated=False) assert dagbag.dagbag_stats[0].warning_num == 2 assert dagbag.captured_warnings == { dag_file: ( @@ -1118,14 +1112,14 @@ def test_dabgag_captured_warnings(self): with warnings.catch_warnings(): # Disable capture DeprecationWarning, and it should be reflected in captured warnings warnings.simplefilter("ignore", DeprecationWarning) - dagbag.collect_dags(dag_folder=dagbag.dag_folder, include_examples=False, only_if_updated=False) + dagbag.collect_dags(dag_folder=dagbag.dag_folder, only_if_updated=False) assert dag_file in dagbag.captured_warnings assert len(dagbag.captured_warnings[dag_file]) == 1 assert dagbag.dagbag_stats[0].warning_num == 1 # Disable all warnings, no captured warnings expected warnings.simplefilter("ignore") - dagbag.collect_dags(dag_folder=dagbag.dag_folder, include_examples=False, only_if_updated=False) + dagbag.collect_dags(dag_folder=dagbag.dag_folder, only_if_updated=False) assert dag_file not in dagbag.captured_warnings assert dagbag.dagbag_stats[0].warning_num == 0 @@ -1139,7 +1133,7 @@ def warning_zipped_dag_path(self, tmp_path: pathlib.Path) -> str: def test_dabgag_captured_warnings_zip(self, warning_zipped_dag_path: str): in_zip_dag_file = f"{warning_zipped_dag_path}/test_dag_warnings.py" - dagbag = DagBag(dag_folder=warning_zipped_dag_path, include_examples=False) + dagbag = DagBag(dag_folder=warning_zipped_dag_path) assert dagbag.dagbag_stats[0].warning_num == 2 assert dagbag.captured_warnings == { warning_zipped_dag_path: ( @@ -1175,7 +1169,7 @@ def test_dag_warnings_invalid_pool(self, known_pools, expected): BaseOperator(task_id="1") BaseOperator(task_id="2", pool="pool1") - dagbag = DagBag(dag_folder="", include_examples=False, collect_dags=False, known_pools=known_pools) + dagbag = DagBag(dag_folder="", collect_dags=False, known_pools=known_pools) dagbag.bag_dag(dag) assert dagbag.dag_warnings == expected @@ -1204,7 +1198,7 @@ def mytask(): ) ) - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert "Received SIGSEGV signal while processing" in caplog.text assert dag_file.as_posix() in dagbag.import_errors @@ -1229,7 +1223,7 @@ def mytask(): ) with mock.patch("airflow.dag_processing.importers.python_importer.signal.signal") as mock_signal: mock_signal.side_effect = ValueError("Invalid signal setting") - DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + DagBag(dag_folder=os.fspath(tmp_path)) assert "SIGSEGV signal handler registration failed. Not in the main thread" in caplog.text @@ -1375,7 +1369,7 @@ def test_dagbag_no_bundle_path_no_syspath_modification(self, tmp_path): ) ) syspath_before = deepcopy(sys.path) - dagbag = DagBag(dag_folder=str(dag_file), include_examples=False) + dagbag = DagBag(dag_folder=str(dag_file)) dag = dagbag.get_dag("simple_dag") assert str(tmp_path) not in dag.description diff --git a/airflow-core/tests/unit/dag_processing/test_manager.py b/airflow-core/tests/unit/dag_processing/test_manager.py index 5b6ff6af014ea..2c24012478e6c 100644 --- a/airflow-core/tests/unit/dag_processing/test_manager.py +++ b/airflow-core/tests/unit/dag_processing/test_manager.py @@ -754,7 +754,6 @@ def test_scan_stale_dags(self, session): ) dagbag = DagBag( test_dag_path.absolute_path, - include_examples=False, bundle_path=test_dag_path.bundle_path, ) @@ -1114,7 +1113,7 @@ def test_refresh_dags_dir_doesnt_delete_zipped_dags( self, tmp_path, session, configure_testing_dag_bundle, test_zip_path ): """Test DagFileProcessorManager._refresh_dag_dir method""" - dagbag = DagBag(dag_folder=tmp_path, include_examples=False) + dagbag = DagBag(dag_folder=tmp_path) dagbag.process_file(test_zip_path) dag = dagbag.get_dag("test_zip_dag") sync_dag_to_db(dag) diff --git a/airflow-core/tests/unit/jobs/test_scheduler_job.py b/airflow-core/tests/unit/jobs/test_scheduler_job.py index 061a87e3aa420..3df577541c5fd 100644 --- a/airflow-core/tests/unit/jobs/test_scheduler_job.py +++ b/airflow-core/tests/unit/jobs/test_scheduler_job.py @@ -3813,7 +3813,7 @@ def test_dagrun_root_after_dagrun_unfinished(self, mock_executor, testing_dag_bu Noted: the DagRun state could be still in running state during CI. """ - dagbag = DagBag(TEST_DAG_FOLDER, include_examples=False) + dagbag = DagBag(TEST_DAG_FOLDER) sync_bag_to_db(dagbag, "testing", None) dag_id = "test_dagrun_states_root_future" @@ -3831,7 +3831,7 @@ def test_scheduler_start_date(self, testing_dag_bundle): """ Test that the scheduler respects start_dates, even when DAGs have run """ - dagbag = DagBag(TEST_DAG_FOLDER, include_examples=False) + dagbag = DagBag(TEST_DAG_FOLDER) with create_session() as session: dag_id = "test_start_date_scheduling" dag = dagbag.get_dag(dag_id) @@ -3888,7 +3888,6 @@ def test_scheduler_task_start_date_catchup_true(self, testing_dag_bundle): """ dagbag = DagBag( dag_folder=os.path.join(settings.DAGS_FOLDER, "test_scheduler_dags.py"), - include_examples=False, ) dag_id = "test_task_start_date_scheduling" dag = dagbag.get_dag(dag_id) @@ -3929,7 +3928,6 @@ def test_scheduler_task_start_date_catchup_false(self, testing_dag_bundle): """ dagbag = DagBag( dag_folder=os.path.join(settings.DAGS_FOLDER, "test_scheduler_dags.py"), - include_examples=False, ) dag_id = "test_task_start_date_scheduling" dag = dagbag.get_dag(dag_id) @@ -3973,7 +3971,7 @@ def test_scheduler_multiprocessing(self): """ Test that the scheduler can successfully queue multiple dags in parallel """ - dagbag = DagBag(TEST_DAG_FOLDER, include_examples=False) + dagbag = DagBag(TEST_DAG_FOLDER) dag_ids = [ "test_start_date_scheduling", "test_task_start_date_scheduling", @@ -7120,7 +7118,7 @@ def watch_heartbeat(*args, **kwargs): def test_mapped_dag(self, dag_id, session, testing_dag_bundle): """End-to-end test of a simple mapped dag""" - dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER, include_examples=False) + dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER) sync_bag_to_db(dagbag, "testing", None) dagbag.process_file(str(TEST_DAGS_FOLDER / f"{dag_id}.py")) dag = dagbag.get_dag(dag_id) @@ -7153,7 +7151,7 @@ def test_should_mark_empty_task_as_success(self, testing_dag_bundle): dag_file = Path(__file__).parents[1] / "dags/test_only_empty_tasks.py" # Write DAGs to dag and serialized_dag table - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) sync_bag_to_db(dagbag, "testing", None) scheduler_job = Job() @@ -8742,7 +8740,7 @@ def test_execute_queries_count_with_harvested_dags( ), ): dagruns = [] - dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE, include_examples=False) + dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE) sync_bag_to_db(dagbag, "testing", None) for i, dag in enumerate(dagbag.dags.values()): @@ -8834,7 +8832,7 @@ def test_process_dags_queries_count( } ), ): - dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE, include_examples=False) + dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE) sync_bag_to_db(dagbag, "testing", None) scheduler_job = Job(job_type=SchedulerJobRunner.job_type) diff --git a/airflow-core/tests/unit/models/test_dag.py b/airflow-core/tests/unit/models/test_dag.py index 0705c9420bd2c..148d9e2165f37 100644 --- a/airflow-core/tests/unit/models/test_dag.py +++ b/airflow-core/tests/unit/models/test_dag.py @@ -207,7 +207,7 @@ def test_dag_test_auto_parses_when_not_serialized(self, test_dags_bundle, sessio dag_id = "test_example_bash_operator" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) dag = dagbag.dags.get(dag_id) # Ensure not serialized yet diff --git a/airflow-core/tests/unit/models/test_dagrun.py b/airflow-core/tests/unit/models/test_dagrun.py index dd34c2d10e7ea..b79e692b5213d 100644 --- a/airflow-core/tests/unit/models/test_dagrun.py +++ b/airflow-core/tests/unit/models/test_dagrun.py @@ -97,7 +97,7 @@ async def empty_callback_for_deadline(): def dagbag(): from airflow.dag_processing.dagbag import DagBag - return DagBag(include_examples=True) + return DagBag() @pytest.fixture diff --git a/airflow-core/tests/unit/serialization/test_dag_serialization.py b/airflow-core/tests/unit/serialization/test_dag_serialization.py index 375b13dea3561..abdb9c55d4d27 100644 --- a/airflow-core/tests/unit/serialization/test_dag_serialization.py +++ b/airflow-core/tests/unit/serialization/test_dag_serialization.py @@ -472,7 +472,7 @@ def collect_dags(dag_folder=None): for directory in glob(f"{AIRFLOW_REPO_ROOT_PATH}/{pattern}"): if any([directory.startswith(excluded_pattern) for excluded_pattern in excluded_patterns]): continue - dagbag = DagBag(directory, include_examples=False) + dagbag = DagBag(directory) dags.update(dagbag.dags) import_errors.update(dagbag.import_errors) return dags, import_errors @@ -1821,9 +1821,7 @@ def mytask(): @pytest.mark.db_test def test_basic_mapped_dag(self, dag_maker): - dagbag = DagBag( - "airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py", include_examples=False - ) + dagbag = DagBag("airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py") assert not dagbag.import_errors dag = dagbag.dags["example_dynamic_task_mapping"] ser_dag = DagSerialization.to_dict(dag) diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 98a871ebb12d2..89cceed23fc03 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -932,7 +932,7 @@ def __init__(self): from airflow.models import DagBag # Keep all the serialized dags we've created in this test - self.dagbag = DagBag(os.devnull, include_examples=False) + self.dagbag = DagBag(os.devnull) def __enter__(self): self.serialized_model = None @@ -1740,7 +1740,7 @@ def _get(dag_id: str): from airflow.models.dagbag import DagBag # type: ignore[no-redef, attribute-defined] dag_file = AIRFLOW_CORE_TESTS_PATH / "unit" / "dags" / f"{dag_id}.py" - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) dag = dagbag.get_dag(dag_id) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 1c5a82c64d5fb..65b3b7a890333 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -21,7 +21,6 @@ import json import logging import os -import warnings from tempfile import gettempdir from typing import TYPE_CHECKING @@ -186,29 +185,17 @@ def initial_db_init(): _bootstrap_dagbag() -def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): +def parse_and_sync_to_db(folder: Path | str): """ Parse DAGs in ``folder`` and sync them to the metadata database. - On Airflow 3.1+, ``include_examples`` is back-compat-only: example DAGs - are exposed as dedicated bundles (``example_dags`` for core, - ``apache-airflow-providers-*-example-dags`` for each provider that ships - an ``example_dags`` folder), and whether they are loaded is controlled by - the ``[core] load_examples`` configuration option, not by this argument. - Tests that need example DAGs should set ``conf_vars({("core", "load_examples"): "true"})`` - instead. Passing ``include_examples=True`` on 3.1+ emits a - :class:`DeprecationWarning`. + On Airflow 3.3+, example DAGs are exposed as dedicated bundles + (``example_dags`` for core, ``apache-airflow-providers-*-example-dags`` + for each provider that ships an ``example_dags`` folder), and whether + they are loaded is controlled by the ``[core] load_examples`` + configuration option. Tests that need example DAGs should set + ``conf_vars({("core", "load_examples"): "true"})``. """ - if AIRFLOW_V_3_1_PLUS and include_examples is True: - warnings.warn( - "include_examples=True is deprecated for parse_and_sync_to_db on " - "Airflow 3.1+. Example DAGs are now loaded via dedicated bundles " - "controlled by the [core] load_examples configuration option. Set " - "conf_vars({('core', 'load_examples'): 'true'}) in the test instead.", - DeprecationWarning, - stacklevel=2, - ) - if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag else: @@ -235,17 +222,17 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): # otherwise the same DAG gets registered under two bundles and # ``dag_schedule_asset_reference`` rows collide on the unique # ``(asset_id, dag_id)`` constraint. - dagbag = DagBag(dag_folder=folder, include_examples=False) + dagbag = DagBag(dag_folder=folder) sync_bag_to_db(dagbag, "dags-folder", None, session=session) for bundle in DagBundlesManager().get_all_dag_bundles(): - bundle_dagbag = DagBag(dag_folder=bundle.path, include_examples=False) + bundle_dagbag = DagBag(dag_folder=bundle.path) sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) elif AIRFLOW_V_3_0_PLUS: - dagbag = DagBag(dag_folder=folder, include_examples=include_examples) + dagbag = DagBag(dag_folder=folder, include_examples=False) dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: - dagbag = DagBag(dag_folder=folder, include_examples=include_examples) + dagbag = DagBag(dag_folder=folder, include_examples=False) dagbag.sync_to_db(session=session) # type: ignore[attr-defined] return dagbag diff --git a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py index 20e622ac3560c..7d1cf407053be 100644 --- a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py +++ b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py @@ -459,7 +459,7 @@ def assert_warning(msg: str, warnings): class DataprocTestBase: @classmethod def setup_class(cls): - cls.dagbag = DagBag(dag_folder="/dev/null", include_examples=False) + cls.dagbag = DagBag(dag_folder="/dev/null") cls.dag = DAG( dag_id=TEST_DAG_ID, schedule=None, diff --git a/providers/google/tests/unit/google/cloud/operators/test_looker.py b/providers/google/tests/unit/google/cloud/operators/test_looker.py index 91f2387a64aa4..2cc2c5ac0513d 100644 --- a/providers/google/tests/unit/google/cloud/operators/test_looker.py +++ b/providers/google/tests/unit/google/cloud/operators/test_looker.py @@ -43,7 +43,7 @@ class LookerTestBase: @classmethod def setUpClass(cls): - cls.dagbag = DagBag(dag_folder="/dev/null", include_examples=False) + cls.dagbag = DagBag(dag_folder="/dev/null") cls.dag = DAG(TEST_DAG_ID, default_args={"owner": "airflow", "start_date": DEFAULT_DATE}) def setup_method(self): diff --git a/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py b/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py index ffb32b6ff1d37..256e873f55e47 100644 --- a/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py +++ b/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py @@ -86,7 +86,6 @@ def setup_job(self, task_name, run_id, listener_manager): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, - include_examples=False, ) dag = dagbag.dags.get("test_openlineage_execution") task = dag.get_task(task_name) @@ -189,7 +188,6 @@ def test_success_overtime_kills_tasks(self, listener_manager): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, - include_examples=False, ) dag = dagbag.dags.get("test_openlineage_execution") task = dag.get_task("execute_long_stall") diff --git a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py index fde38e4e2367a..afaed86fc6753 100644 --- a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py +++ b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py @@ -62,7 +62,11 @@ from tests_common.test_utils.dag import create_scheduler_dag, sync_dag_to_db, sync_dags_to_db from tests_common.test_utils.db import clear_db_runs from tests_common.test_utils.mock_operators import MockOperator -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_1_PLUS, + AIRFLOW_V_3_2_PLUS, +) if AIRFLOW_V_3_0_PLUS: from airflow.models.dag_version import DagVersion @@ -1721,7 +1725,7 @@ def dag_bag_ext(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule=None) task_a_0 = EmptyOperator(task_id="task_a_0", dag=dag_0) @@ -1785,7 +1789,7 @@ def dag_bag_parent_child(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) day_1 = DEFAULT_DATE @@ -2020,7 +2024,7 @@ def dag_bag_cyclic(): """ def _factory(depth: int) -> DagBag: - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) dags = [] @@ -2118,7 +2122,7 @@ def dag_bag_multiple(session): """ Create a DagBag containing two DAGs, linked by multiple ExternalTaskMarker. """ - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) daily_dag = DAG("daily_dag", start_date=DEFAULT_DATE, schedule="@daily") agg_dag = DAG("agg_dag", start_date=DEFAULT_DATE, schedule="@daily") if AIRFLOW_V_3_0_PLUS: @@ -2164,7 +2168,7 @@ def dag_bag_head_tail(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: head = ExternalTaskSensor( @@ -2209,7 +2213,7 @@ def dag_bag_head_tail_mapped_tasks(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: diff --git a/providers/standard/tests/unit/standard/sensors/test_time_delta.py b/providers/standard/tests/unit/standard/sensors/test_time_delta.py index b5c931e3dafbb..4a2e11a22f0a3 100644 --- a/providers/standard/tests/unit/standard/sensors/test_time_delta.py +++ b/providers/standard/tests/unit/standard/sensors/test_time_delta.py @@ -36,7 +36,12 @@ from airflow.utils.types import DagRunType from tests_common.test_utils import db -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS, timezone +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, + timezone, +) if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag @@ -63,7 +68,7 @@ def clear_db(): class TestTimedeltaSensor: def setup_method(self): - self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=False) + self.dagbag = DagBag(dag_folder=DEV_NULL) self.dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), start_date=DEFAULT_DATE) def test_timedelta_sensor(self, mocker): @@ -161,7 +166,10 @@ def test_timedelta_sensor_deferrable_run_after_vs_interval(run_after, interval_e class TestTimeDeltaSensorAsync: def setup_method(self): - self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(dag_folder=DEV_NULL) + else: + self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) self.args = {"owner": "airflow", "start_date": DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), default_args=self.args) diff --git a/providers/standard/tests/unit/standard/sensors/test_weekday.py b/providers/standard/tests/unit/standard/sensors/test_weekday.py index 4f9bac530785d..69f2d82bf7bc8 100644 --- a/providers/standard/tests/unit/standard/sensors/test_weekday.py +++ b/providers/standard/tests/unit/standard/sensors/test_weekday.py @@ -27,7 +27,12 @@ from airflow.providers.standard.utils.weekday import WeekDay from tests_common.test_utils import db -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS, timezone +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, + timezone, +) if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag @@ -66,7 +71,10 @@ def clean_db(): def setup_method(self): self.clean_db() - self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(dag_folder=DEV_NULL) + else: + self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) self.args = {"owner": "airflow", "start_date": DEFAULT_DATE} dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), default_args=self.args) self.dag = dag From 49096263da32defa6286b5a7d13d533cf7fc5173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Mon, 4 May 2026 12:37:58 -0300 Subject: [PATCH 14/17] Fix mypy and test assertion after DagBag include_examples removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit removed include_examples from DagBag callers but left two follow-ups that broke prek mypy and the config CLI test: - devel-common test_utils/db.py: pre-3.1 compat branches still call DagBag(include_examples=False) for older Airflow runtimes; mypy now flags it because the current source no longer accepts that kwarg. Add call-arg type-ignore (matches the existing attr-defined pattern on the sync_to_db calls below). - test_config_command.py: assertions for conf.write(...) lost the include_examples=False kwarg, but conf.write is AirflowConfigParser.write (config-file examples), unrelated to the DagBag flag, and still passes it. Restore the kwarg in the expected call. Signed-off-by: André Ahlert --- airflow-core/tests/unit/cli/commands/test_config_command.py | 2 ++ devel-common/src/tests_common/test_utils/db.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/airflow-core/tests/unit/cli/commands/test_config_command.py b/airflow-core/tests/unit/cli/commands/test_config_command.py index efe5ad5cf296e..008554422624b 100644 --- a/airflow-core/tests/unit/cli/commands/test_config_command.py +++ b/airflow-core/tests/unit/cli/commands/test_config_command.py @@ -45,6 +45,7 @@ def test_cli_show_config_should_write_data(self, mock_conf, mock_stringio): mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section=None, + include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, @@ -64,6 +65,7 @@ def test_cli_show_config_should_write_data_specific_section(self, mock_conf, moc mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section="core", + include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 65b3b7a890333..f38b4d75c1c9d 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -229,10 +229,10 @@ def parse_and_sync_to_db(folder: Path | str): sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) elif AIRFLOW_V_3_0_PLUS: - dagbag = DagBag(dag_folder=folder, include_examples=False) + dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: - dagbag = DagBag(dag_folder=folder, include_examples=False) + dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] dagbag.sync_to_db(session=session) # type: ignore[attr-defined] return dagbag From 9ef9809f85d4965997d0f407046f9f4f8ef16659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Mon, 4 May 2026 20:21:22 -0300 Subject: [PATCH 15/17] Drop include_examples from new DagBag callers and gate dag_maker for compat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge with main brought in tests in airflow-core/tests/unit/models/test_dag.py that still passed include_examples=False to DagBag, and the dag_maker fixture in pytest_plugin.py was loading examples by default on Airflow <3.3 because the removed kwarg flipped the effective default to True. That broke compat 3.0.6 runs because the cleanup path then tried to roll back a session that was never attached. Drop the kwarg from the new test_dag.py callers and gate both dag_maker and get_test_dag DagBag construction on AIRFLOW_V_3_3_PLUS so older Airflow versions still pass include_examples=False explicitly. Signed-off-by: André Ahlert --- airflow-core/tests/unit/models/test_dag.py | 8 ++++---- devel-common/src/tests_common/pytest_plugin.py | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/airflow-core/tests/unit/models/test_dag.py b/airflow-core/tests/unit/models/test_dag.py index 142d8540df148..e16534e894e12 100644 --- a/airflow-core/tests/unit/models/test_dag.py +++ b/airflow-core/tests/unit/models/test_dag.py @@ -235,7 +235,7 @@ def test_dag_test_syncs_sibling_for_trigger_dagrun(self, test_dags_bundle, sessi parent_id = "test_dag_test_trigger_parent" target_id = "test_dag_test_trigger_target" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None @@ -268,7 +268,7 @@ def test_dag_test_syncs_sibling_for_dynamic_trigger_dagrun(self, test_dags_bundl parent_id = "test_dag_test_dynamic_trigger_parent" target_id = "test_dag_test_dynamic_trigger_target" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None @@ -295,7 +295,7 @@ def test_dag_test_falls_back_when_recorded_bundle_no_longer_configured( parent_id = "test_dag_test_trigger_parent" target_id = "test_dag_test_trigger_target" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None @@ -323,7 +323,7 @@ def test_dag_test_only_syncs_owning_bundle_when_parent_already_serialized( """ parent_id = "test_dag_test_trigger_parent" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 89cceed23fc03..788afa9533319 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -910,6 +910,7 @@ def dag_maker(request) -> Generator[DagMaker, None, None]: AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, NOTSET, ) @@ -932,7 +933,10 @@ def __init__(self): from airflow.models import DagBag # Keep all the serialized dags we've created in this test - self.dagbag = DagBag(os.devnull) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(os.devnull) + else: + self.dagbag = DagBag(os.devnull, include_examples=False) # type: ignore[call-arg] def __enter__(self): self.serialized_model = None @@ -1732,7 +1736,11 @@ def _get(dag_id: str): from airflow import settings from airflow.models.serialized_dag import SerializedDagModel - from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS + from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, + ) if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag @@ -1740,7 +1748,10 @@ def _get(dag_id: str): from airflow.models.dagbag import DagBag # type: ignore[no-redef, attribute-defined] dag_file = AIRFLOW_CORE_TESTS_PATH / "unit" / "dags" / f"{dag_id}.py" - dagbag = DagBag(dag_folder=dag_file) + if AIRFLOW_V_3_3_PLUS: + dagbag = DagBag(dag_folder=dag_file) + else: + dagbag = DagBag(dag_folder=dag_file, include_examples=False) # type: ignore[call-arg] dag = dagbag.get_dag(dag_id) From f7c43d69dda7bd2118e0c97b57d66ecc6910ca67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Tue, 5 May 2026 05:51:19 -0300 Subject: [PATCH 16/17] Gate DagBag include_examples on Airflow 3.3+ for older compat runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR drops include_examples=False from a number of DagBag callers because DagBag in 3.3+ no longer accepts the kwarg and never loads examples by default. On compat runs against Airflow 3.1.x and 3.2.x, parse_and_sync_to_db hit a branch that called DagBag(folder) with the old default include_examples=True, which let the dags-folder DagBag pull in core example DAGs that the bundle loop also synced, producing duplicate dag_schedule_asset_reference rows and a UNIQUE constraint failure. On Airflow 2.11.1, DagBag callers in the standard external_task_sensor and time_delta tests defaulted to include_examples=True and pulled in example DAGs whose required Params lack defaults, breaking ExternalTaskMarker tests with ParamValidationError. Restrict the new bundle-aware sync path to AIRFLOW_V_3_3_PLUS and pass include_examples=False explicitly on older Airflow versions in the standard provider tests so compat 2.11.1, 3.1.8, and 3.2.1 keep their original behaviour. Signed-off-by: André Ahlert --- .../src/tests_common/test_utils/db.py | 22 +++++++++++----- .../sensors/test_external_task_sensor.py | 26 ++++++++++++++----- .../unit/standard/sensors/test_time_delta.py | 5 +++- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index f38b4d75c1c9d..b4d2bc1740cdb 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -64,7 +64,12 @@ ParseImportError, TaskOutletAssetReference, ) -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_1_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, +) log = logging.getLogger(__name__) @@ -209,11 +214,9 @@ def parse_and_sync_to_db(folder: Path | str): DagBundlesManager().sync_bundles_to_db(session=session) session.flush() - if AIRFLOW_V_3_1_PLUS: - try: - from airflow.dag_processing.dagbag import sync_bag_to_db - except ImportError: - from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + if AIRFLOW_V_3_3_PLUS: + from airflow.dag_processing.dagbag import sync_bag_to_db + # On 3.3+, example DAGs are exposed as their own bundles # (``example_dags`` for core, ``apache-airflow-providers-*-example-dags`` # for each provider that ships an ``example_dags`` folder). The @@ -228,6 +231,13 @@ def parse_and_sync_to_db(folder: Path | str): bundle_dagbag = DagBag(dag_folder=bundle.path) sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) + elif AIRFLOW_V_3_1_PLUS: + try: + from airflow.dag_processing.dagbag import sync_bag_to_db + except ImportError: + from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] + sync_bag_to_db(dagbag, "dags-folder", None, session=session) elif AIRFLOW_V_3_0_PLUS: dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] diff --git a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py index afaed86fc6753..fd5d7ea0fee11 100644 --- a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py +++ b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py @@ -66,8 +66,22 @@ AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, ) + +def _make_dagbag(dag_folder): + """DagBag with examples disabled on Airflow <3.3. + + In 3.3+, ``include_examples`` was removed and example DAGs come from + provider example bundles instead. On older versions the default is True, + which loads example DAGs that can fail tests with their required Params. + """ + if AIRFLOW_V_3_3_PLUS: + return DagBag(dag_folder=dag_folder) + return DagBag(dag_folder=dag_folder, include_examples=False) # type: ignore[call-arg] + + if AIRFLOW_V_3_0_PLUS: from airflow.models.dag_version import DagVersion from airflow.sdk import BaseOperator, task as task_deco @@ -1725,7 +1739,7 @@ def dag_bag_ext(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule=None) task_a_0 = EmptyOperator(task_id="task_a_0", dag=dag_0) @@ -1789,7 +1803,7 @@ def dag_bag_parent_child(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) day_1 = DEFAULT_DATE @@ -2024,7 +2038,7 @@ def dag_bag_cyclic(): """ def _factory(depth: int) -> DagBag: - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) dags = [] @@ -2122,7 +2136,7 @@ def dag_bag_multiple(session): """ Create a DagBag containing two DAGs, linked by multiple ExternalTaskMarker. """ - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) daily_dag = DAG("daily_dag", start_date=DEFAULT_DATE, schedule="@daily") agg_dag = DAG("agg_dag", start_date=DEFAULT_DATE, schedule="@daily") if AIRFLOW_V_3_0_PLUS: @@ -2168,7 +2182,7 @@ def dag_bag_head_tail(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: head = ExternalTaskSensor( @@ -2213,7 +2227,7 @@ def dag_bag_head_tail_mapped_tasks(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: diff --git a/providers/standard/tests/unit/standard/sensors/test_time_delta.py b/providers/standard/tests/unit/standard/sensors/test_time_delta.py index 4a2e11a22f0a3..5c4eeef2a8eb9 100644 --- a/providers/standard/tests/unit/standard/sensors/test_time_delta.py +++ b/providers/standard/tests/unit/standard/sensors/test_time_delta.py @@ -68,7 +68,10 @@ def clear_db(): class TestTimedeltaSensor: def setup_method(self): - self.dagbag = DagBag(dag_folder=DEV_NULL) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(dag_folder=DEV_NULL) + else: + self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=False) # type: ignore[call-arg] self.dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), start_date=DEFAULT_DATE) def test_timedelta_sensor(self, mocker): From 30607c982a5236f41d910845847a846c78cfe4cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Tue, 5 May 2026 17:50:25 -0300 Subject: [PATCH 17/17] Trim 66161 newsfragment to user-visible facts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DagBag is not part of the Airflow public interface, so removing include_examples from DagBag/collect_dags/BundleDagBag/parse_and_sync_to_db does not need a release-notes migration recipe. Drop the internal removal block and the Types of change / Migration rules scaffolding, keep the bundle naming change and the bundle_name REST API behaviour change. Signed-off-by: André Ahlert --- .../newsfragments/66161.significant.rst | 71 +++---------------- 1 file changed, 9 insertions(+), 62 deletions(-) diff --git a/airflow-core/newsfragments/66161.significant.rst b/airflow-core/newsfragments/66161.significant.rst index aa4db6ba9f9f7..79b7313bb291d 100644 --- a/airflow-core/newsfragments/66161.significant.rst +++ b/airflow-core/newsfragments/66161.significant.rst @@ -1,65 +1,12 @@ Provider example DAGs are exposed as dedicated bundles -Example DAGs that ship with provider distributions are now discovered via +Example DAGs shipped by provider distributions are now discovered via ``ProvidersManager`` and registered as their own DAG bundles, one per -provider that ships an ``example_dags/`` folder. Bundle names follow the -shape ``apache-airflow-providers--example-dags`` (for -canonical Apache providers) or ``-example-dags`` (for -third-party providers). The ``[core] load_examples`` configuration option -remains the single switch that controls whether any example bundles are -registered. - -**What changed:** - -- Example DAGs that previously came in under the implicit ``dags-folder`` - bundle are now persisted in ``DagBundleModel`` rows and emitted in REST - API responses (``GET /api/v2/dags/{dag_id}/dag-versions`` and the - ``bundle_name`` field on task-instance responses) under the new - per-provider bundle names. -- Nested providers such as ``apache-airflow-providers-common-sql`` are - discovered correctly (previously they were missed because discovery - walked ``airflow.providers.__path__`` directly). - -**Behaviour changes:** - -- Clients filtering or tracking bundles by ``"dags-folder"`` for - previously-shipped example DAGs (e.g. ``example_python_operator``) need - to update to the new per-provider bundle names. The DAG identifiers - themselves are unchanged. - -**Removals:** - -- The ``include_examples`` parameter has been removed from - ``DagBag.__init__``, ``DagBag.collect_dags``, ``BundleDagBag.__init__``, - and ``tests_common.test_utils.db.parse_and_sync_to_db``. Example DAG - loading is now controlled exclusively by the ``[core] load_examples`` - configuration option, which gates whether the per-provider example - bundles are registered. Callers that previously passed - ``include_examples=True`` should set - ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent - configuration). Callers that previously passed - ``include_examples=False`` can drop the argument: it matches the new - default behaviour where ``DagBag`` only walks the configured - ``dag_folder`` and example DAGs come in via dedicated bundles. - -* Types of change - - * [ ] Dag changes - * [x] Config changes - * [x] API changes - * [ ] CLI changes - * [x] Behaviour changes - * [ ] Plugin changes - * [ ] Dependency changes - * [x] Code interface changes - -* Migration rules needed - - * Update clients that filter REST API responses by ``bundle_name`` to - match the new per-provider bundle names for example DAGs. - * Replace ``include_examples=True`` calls to ``DagBag`` / - ``parse_and_sync_to_db`` with - ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent - configuration). The argument has been removed. - * Drop ``include_examples=False`` arguments from ``DagBag`` / - ``parse_and_sync_to_db`` calls; the default behaviour is unchanged. +provider, named ``apache-airflow-providers--example-dags`` +(or ``-example-dags`` for third-party providers). The +``[core] load_examples`` option still gates whether they are registered. + +REST API clients that filtered ``bundle_name`` by ``"dags-folder"`` for +provider-shipped example DAGs (e.g. ``example_python_operator``) must +update to the new per-provider bundle names. DAG identifiers are +unchanged.