diff --git a/flow.cylc b/flow.cylc index 0fda894d..733ca597 100644 --- a/flow.cylc +++ b/flow.cylc @@ -22,6 +22,12 @@ {% set STALL_TIMEOUT = "P1W" %} {% endif %} +{# Set default retries unless it is already set by the test pipeline #} +{# Retry all tasks twice- once after an hour, second after two hours #} +{% if DEFAULT_RETRIES is not defined %} + {% set DEFAULT_RETRIES = "PT1H, PT2H" %} +{% endif %} + {# Set ANALYSIS_START and ANALYSIS_STOP if they do not exist #} {% if ANALYSIS_START is not defined %} {% set ANALYSIS_START = PP_START %} @@ -109,8 +115,8 @@ # Runahead limit specifies the number of cycle points that will spawn ahead of the oldest incomplete task. # We don't need it because we specify the final cycle point, and it decreases robustness # as it prevents future tasks from running due to a previous failed task. - # As runahead limit is required, we will set it to 99999. - runahead limit = P99999 + # As runahead limit is required, we will set it to 999. + runahead limit = P999 [[queues]] # Limit the entire workflow to 100 active tasks. Only allow a single cleaning # or data-catalog task to run at once, as they can fail if run in parallel. @@ -289,11 +295,15 @@ [runtime] [[root]] + execution retry delays = {{ DEFAULT_RETRIES }} [[[environment]]] fre_yaml = $CYLC_WORKFLOW_RUN_DIR/{{ YAML }} [[pp-starter]] inherit = PP-STARTER + # Note: pp-starter is a fake task and should be rephrased as a proper trigger. + # No task retries for this fake task. + execution retry delays = # NOTE! script must appear *before* [[[enviroment]]] or else # the job scripts will have quoting issues script = """ diff --git a/for_gh_runner/runscript.sh b/for_gh_runner/runscript.sh index 744fd6a7..e6decbd3 100755 --- a/for_gh_runner/runscript.sh +++ b/for_gh_runner/runscript.sh @@ -118,7 +118,8 @@ fre_pp_steps () { ## RUN echo -e "\nRunning the workflow with cylc play ..." - cylc play --no-detach --debug -s 'STALL_TIMEOUT="PT0S"' ${name} + # set these two jinja variables to disable task retries and set the stall timer to zero + cylc play --no-detach --debug -s 'STALL_TIMEOUT="PT0S"' -s 'DEFAULT_RETRIES=""' ${name} #check_exit_status "PLAY" # if cylc play fails and this is not commented, log uploading does not work ## SUMMARY