From 75eef1121cf8cd2d6de5f32f69585dbd5c514dee Mon Sep 17 00:00:00 2001 From: wjddn279 Date: Mon, 23 Mar 2026 19:56:02 +0900 Subject: [PATCH 1/3] add docs for dag version inflation --- airflow-core/docs/faq.rst | 160 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/airflow-core/docs/faq.rst b/airflow-core/docs/faq.rst index f57f2ddebf22c..e1d2e4c1ce74e 100644 --- a/airflow-core/docs/faq.rst +++ b/airflow-core/docs/faq.rst @@ -237,6 +237,166 @@ There are several reasons why Dags might disappear from the UI. Common causes in * **Time synchronization issues** - Ensure all nodes (database, schedulers, workers) use NTP with <1s clock drift. +.. _faq:dag-version-inflation: + +Why does my Dag version keep increasing? +----------------------------------------- + +Every time the Dag processor parses a Dag file, it serializes the Dag and compares the result with the +version stored in the metadata database. If anything has changed, Airflow creates a new Dag version. This +mechanism ensures that Dag runs use consistent code throughout their execution, even if the Dag file is +updated mid-run. + +**Dag version inflation** occurs when the version number increases indefinitely without the Dag author +making any intentional changes. + +What goes wrong +""""""""""""""" + +When Dag versions increase without meaningful changes: + +* The metadata database accumulates unnecessary Dag version records, increasing storage and query overhead. +* The UI shows a misleading history of Dag changes, making it harder to identify real modifications. +* The scheduler and API server may consume more memory as they load and cache a growing number of Dag versions. + +Common causes +""""""""""""" + +Version inflation is caused by using values that change at **parse time** — that is, every time the Dag +processor evaluates the Dag file — as arguments to Dag or Task constructors. The most common patterns are: + +**1. Using ``datetime.now()`` or ``pendulum.now()`` as ``start_date``:** + +.. code-block:: python + + from datetime import datetime + + from airflow.sdk import DAG + + # BAD: datetime.now() produces a different value on every parse + with DAG( + dag_id="bad_example", + start_date=datetime.now(), + schedule="@daily", + ): + ... + +Every parse produces a different ``start_date``, so the serialized Dag is always different from the +stored version. + +**2. Using random values in Dag or Task arguments:** + +.. code-block:: python + + import random + + from airflow.sdk import DAG + from airflow.providers.standard.operators.python import PythonOperator + + with DAG(dag_id="bad_random", start_date="2024-01-01", schedule="@daily") as dag: + # BAD: random value changes every parse + PythonOperator( + task_id=f"task_{random.randint(1, 1000)}", + python_callable=lambda: None, + ) + +**3. Assigning runtime-varying values to variables used in constructors:** + +.. code-block:: python + + from datetime import datetime + + from airflow.sdk import DAG + from airflow.providers.standard.operators.python import PythonOperator + + # BAD: the variable captures a parse-time value, then is passed to the DAG + default_args = {"start_date": datetime.now()} + + with DAG(dag_id="bad_defaults", default_args=default_args, schedule="@daily") as dag: + PythonOperator(task_id="my_task", python_callable=lambda: None) + +Even though ``datetime.now()`` is not called directly inside the Dag constructor, it flows in through +``default_args`` and still causes a different serialized Dag on every parse. + +**4. Using environment variables or file contents that change between parses:** + +.. code-block:: python + + import os + + from airflow.sdk import DAG + from airflow.providers.standard.operators.bash import BashOperator + + with DAG(dag_id="bad_env", start_date="2024-01-01", schedule="@daily") as dag: + # BAD if BUILD_NUMBER changes on every deployment or parse + BashOperator( + task_id="echo_build", + bash_command=f"echo {os.environ.get('BUILD_NUMBER', 'unknown')}", + ) + +How to avoid version inflation +"""""""""""""""""""""""""""""" + +* **Use fixed ``start_date`` values.** Always set ``start_date`` to a static ``datetime`` literal: + + .. code-block:: python + + import datetime + + from airflow.sdk import DAG + + with DAG( + dag_id="good_example", + start_date=datetime.datetime(2024, 1, 1), + schedule="@daily", + ): + ... + +* **Keep all Dag and Task constructor arguments deterministic.** Arguments passed to Dag and Operator + constructors must produce the same value on every parse. Move any dynamic computation into the + ``execute()`` method or use Jinja templates, which are evaluated at task execution time rather than + parse time. + +* **Use Jinja templates for dynamic values:** + + .. code-block:: python + + from airflow.providers.standard.operators.bash import BashOperator + + # GOOD: the template is resolved at execution time, not parse time + BashOperator( + task_id="echo_date", + bash_command="echo {{ ds }}", + ) + +* **Use Airflow Variables with templates instead of top-level lookups:** + + .. code-block:: python + + from airflow.providers.standard.operators.bash import BashOperator + + # GOOD: Variable is resolved at execution time via template + BashOperator( + task_id="echo_var", + bash_command="echo {{ var.value.my_variable }}", + ) + +Dag version inflation detection +"""""""""""""""""""""""""""""""" + +Starting from Airflow 3.2, the Dag processor performs **AST-based static analysis** on every Dag file +before parsing to detect runtime-varying values in Dag and Task constructors. When a potential issue is +found, it is surfaced as a **Dag warning** visible in the UI. + +You can control this behavior with the +:ref:`dag_version_inflation_check_level ` +configuration option: + +* ``off`` — Disables the check entirely. No errors or warnings are generated. +* ``warning`` (default) — Dags load normally but warnings are displayed in the UI when issues are detected. +* ``error`` — Treats detected issues as Dag import errors, preventing the Dag from loading. + + Dag construction ^^^^^^^^^^^^^^^^ From 0501635185347e75468d769744cd7bdc27008055 Mon Sep 17 00:00:00 2001 From: wjddn279 Date: Wed, 25 Mar 2026 12:02:06 +0900 Subject: [PATCH 2/3] fix docs --- airflow-core/docs/best-practices.rst | 2 ++ airflow-core/docs/faq.rst | 15 +++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/airflow-core/docs/best-practices.rst b/airflow-core/docs/best-practices.rst index d58d5c48fe00d..cd0f102d7efaa 100644 --- a/airflow-core/docs/best-practices.rst +++ b/airflow-core/docs/best-practices.rst @@ -296,6 +296,8 @@ When you execute that code you will see: This means that the ``get_array`` is not executed as top-level code, but ``get_task_id`` is. +.. _best_practices/code_quality_and_linting: + Code Quality and Linting ------------------------ diff --git a/airflow-core/docs/faq.rst b/airflow-core/docs/faq.rst index e1d2e4c1ce74e..6fe515cac688d 100644 --- a/airflow-core/docs/faq.rst +++ b/airflow-core/docs/faq.rst @@ -243,9 +243,7 @@ Why does my Dag version keep increasing? ----------------------------------------- Every time the Dag processor parses a Dag file, it serializes the Dag and compares the result with the -version stored in the metadata database. If anything has changed, Airflow creates a new Dag version. This -mechanism ensures that Dag runs use consistent code throughout their execution, even if the Dag file is -updated mid-run. +version stored in the metadata database. If anything has changed, Airflow creates a new Dag version. **Dag version inflation** occurs when the version number increases indefinitely without the Dag author making any intentional changes. @@ -273,9 +271,9 @@ processor evaluates the Dag file — as arguments to Dag or Task constructors. T from airflow.sdk import DAG - # BAD: datetime.now() produces a different value on every parse with DAG( dag_id="bad_example", + # BAD: datetime.now() produces a different value on every parse start_date=datetime.now(), schedule="@daily", ): @@ -294,8 +292,8 @@ stored version. from airflow.providers.standard.operators.python import PythonOperator with DAG(dag_id="bad_random", start_date="2024-01-01", schedule="@daily") as dag: - # BAD: random value changes every parse PythonOperator( + # BAD: random value changes every parse task_id=f"task_{random.randint(1, 1000)}", python_callable=lambda: None, ) @@ -328,9 +326,9 @@ Even though ``datetime.now()`` is not called directly inside the Dag constructor from airflow.providers.standard.operators.bash import BashOperator with DAG(dag_id="bad_env", start_date="2024-01-01", schedule="@daily") as dag: - # BAD if BUILD_NUMBER changes on every deployment or parse BashOperator( task_id="echo_build", + # BAD if BUILD_NUMBER changes on every deployment or parse bash_command=f"echo {os.environ.get('BUILD_NUMBER', 'unknown')}", ) @@ -396,6 +394,11 @@ configuration option: * ``warning`` (default) — Dags load normally but warnings are displayed in the UI when issues are detected. * ``error`` — Treats detected issues as Dag import errors, preventing the Dag from loading. +Additionally, you can catch these issues earlier in your development workflow by using the +`AIR302 `_ ruff rule, which detects +dynamic values in Dag and Task constructors as part of static linting. See +:ref:`best_practices/code_quality_and_linting` for how to set up ruff with Airflow-specific rules. + Dag construction ^^^^^^^^^^^^^^^^ From 3e94687368900fb34353bae6756d8fe3a6296bec Mon Sep 17 00:00:00 2001 From: wjddn279 Date: Wed, 25 Mar 2026 12:04:52 +0900 Subject: [PATCH 3/3] fix docs --- airflow-core/docs/faq.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow-core/docs/faq.rst b/airflow-core/docs/faq.rst index 6fe515cac688d..fe48c3695dc81 100644 --- a/airflow-core/docs/faq.rst +++ b/airflow-core/docs/faq.rst @@ -361,9 +361,9 @@ How to avoid version inflation from airflow.providers.standard.operators.bash import BashOperator - # GOOD: the template is resolved at execution time, not parse time BashOperator( task_id="echo_date", + # GOOD: the template is resolved at execution time, not parse time bash_command="echo {{ ds }}", ) @@ -373,9 +373,9 @@ How to avoid version inflation from airflow.providers.standard.operators.bash import BashOperator - # GOOD: Variable is resolved at execution time via template BashOperator( task_id="echo_var", + # GOOD: Variable is resolved at execution time via template bash_command="echo {{ var.value.my_variable }}", )