diff --git a/engine/performance.py b/engine/performance.py index 6edf2bce..19192363 100644 --- a/engine/performance.py +++ b/engine/performance.py @@ -23,28 +23,28 @@ @click.command() @click.option( - "--timing-regex", - help=cli_help["timing_regex"], + "--log_file", + help=cli_help["log_file"], ) @click.option( "--timing-database", help=cli_help["timing_database"], ) @click.option("--append-time", help=cli_help["append_time"], type=bool, default=False) -def performance(timing_regex, timing_database, append_time): - timing_file_name_base, timing_regex = os.path.split(timing_regex) +def performance(log_file, timing_database, append_time): + timing_file_name_base, log_file = os.path.split(log_file) if timing_file_name_base == "": timing_file_name_base = "." - timing_file_name, err = file_names_from_pattern(timing_file_name_base, timing_regex) + timing_file_name, err = file_names_from_pattern(timing_file_name_base, log_file) if err > 0: - logger.info("Did not find any files for regex %s", timing_regex) + logger.info("Did not find any files for regex %s", log_file) sys.exit(1) if len(timing_file_name) > 1: logger.critical( "Found too many files for regex '%s' in '%s':", - timing_regex, + log_file, timing_file_name_base, ) for tf in timing_file_name: @@ -52,7 +52,7 @@ def performance(timing_regex, timing_database, append_time): sys.exit(1) else: timing_file_name = timing_file_name_base + "/" + timing_file_name[0] - logger.info("Found timing file %s for regex %s", timing_file_name, timing_regex) + logger.info("Found timing file %s for regex %s", timing_file_name, log_file) tt = TimingTree.from_logfile(timing_file_name, read_logfile) diff --git a/poetry.lock b/poetry.lock index d8583350..e15e414a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2661,6 +2661,17 @@ click = ">=8.2.1" rich = ">=12.3.0" shellingham = ">=1.3.0" +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20260124" +description = "Typing stubs for python-dateutil" +optional = false +python-versions = ">=3.9" +files = [ + {file = "types_python_dateutil-2.9.0.20260124-py3-none-any.whl", hash = "sha256:f802977ae08bf2260142e7ca1ab9d4403772a254409f7bbdf652229997124951"}, + {file = "types_python_dateutil-2.9.0.20260124.tar.gz", hash = "sha256:7d2db9f860820c30e5b8152bfe78dbdf795f7d1c6176057424e8b3fdd1f581af"}, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -2754,4 +2765,4 @@ viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "5dbd99db2f4a14cf026d2b1e243748cb39e51fc0828756e8def9cc6782767aea" +content-hash = "6c318da3280b260c76b1ed7f0eb81a65be464cf34bbec2de00fd5a58b76d51a6" diff --git a/pyproject.toml b/pyproject.toml index a90ab8e6..624f92f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ pathlib = ">=1.0.1" pandas = ">=2.2.3" regex = ">=2024.11.6" xarray = ">=2024.11.0" +types-python-dateutil = "^2.9.0" [tool.mypy] allow_untyped_defs = true diff --git a/requirements.txt b/requirements.txt index 5280e3d9..9b29f92c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -772,6 +772,9 @@ regex==2026.2.28 ; python_version >= "3.10" and python_version < "4.0" \ six==1.17.0 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 +types-python-dateutil==2.9.0.20260124 ; python_version >= "3.10" and python_version < "4.0" \ + --hash=sha256:7d2db9f860820c30e5b8152bfe78dbdf795f7d1c6176057424e8b3fdd1f581af \ + --hash=sha256:f802977ae08bf2260142e7ca1ab9d4403772a254409f7bbdf652229997124951 tzdata==2025.3 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1 \ --hash=sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7 diff --git a/templates/ICON.jinja b/templates/ICON.jinja index 3100be9a..3f0d106a 100644 --- a/templates/ICON.jinja +++ b/templates/ICON.jinja @@ -59,7 +59,7 @@ }, "performance": { - "timing_regex": "{{codebase_install}}/run/LOG.exp.{{experiment_name}}.*", + "log_file": "{{codebase_install}}/run/LOG.exp.{{experiment_name}}.*", "timing_names": ["integrate_nh", "nh_solve", "physics"], "timing_database": "{{codebase_install}}/database", "append_time": "{{append_time}}" diff --git a/templates/testdata.jinja b/templates/testdata.jinja index 64a7a78f..38532c33 100644 --- a/templates/testdata.jinja +++ b/templates/testdata.jinja @@ -37,7 +37,7 @@ "current_files": "{{reference}}/reference/{{experiment_name}}_{{member_id[1] if member_id|length>1 else 2}}.csv" }, "performance": { - "timing_regex": "{{codebase_install}}/{{experiment_name}}/LOG.exp.{{experiment_name}}.run.12345678.o", + "log_file": "{{codebase_install}}/{{experiment_name}}/LOG.exp.{{experiment_name}}.run.12345678.o", "timing_names": ["integrate_nh", "nh_solve", "physics"], "timing_database": "{{reference}}/performance/{{experiment_name}}" }, diff --git a/tests/data/reference_meta.json b/tests/data/reference_meta.json index 3935f496..538ae99e 100644 --- a/tests/data/reference_meta.json +++ b/tests/data/reference_meta.json @@ -8,4 +8,4 @@ 37, 48 ] -} \ No newline at end of file +} diff --git a/tests/data/timing_example_3.txt b/tests/data/timing_example_3.txt index c418b2c5..f1172fbf 100644 --- a/tests/data/timing_example_3.txt +++ b/tests/data/timing_example_3.txt @@ -401,7 +401,7 @@ no_of_models=1 libpcre.so.1 => /usr/lib64/libpcre.so.1 (0x0000145b27b03000) + rm -f finish.status + date -Wed 28 Jun 2023 04:42:35 PM CEST +Fri 30 Jan 16:13:34 CET 2026 + set -x + srun -n 4 --ntasks-per-node 4 '--threads-per-core=1' '--distribution=cyclic' /scratch/e1000/meteoswiss/scratch/huppd/add_performance_test/mch_gpu_mixed_v0.18.1.6/run/run_wrapper/balfrin_gpu.sh /scratch/e1000/meteoswiss/scratch/huppd/add_performance_test/mch_gpu_mixed_v0.18.1.6/bin/icon @@ -4411,7 +4411,7 @@ input results for domain 1: mo_atmo_model:destruct_atmo_model: destruct_patches is done mo_atmo_model:destruct_atmo_model: clean-up finished + set +x -Wed 28 Jun 2023 04:42:57 PM CEST +Fri 30 Jan 16:16:34 CET 2026 OK ============================ Script run successfully: OK diff --git a/tests/helpers.py b/tests/helpers.py index 79917019..35f97a5e 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -81,10 +81,10 @@ def assert_empty_df(df, msg): assert len(df.values) == 0, f"{msg}:\n{df}" -def run_performance_cli(timing_regex, timing_database): +def run_performance_cli(log_file, timing_database): args = [ - "--timing-regex", - timing_regex, + "--log_file", + log_file, "--timing-database", timing_database, ] diff --git a/util/click_util.py b/util/click_util.py index 1ddadcd6..46943242 100644 --- a/util/click_util.py +++ b/util/click_util.py @@ -147,9 +147,11 @@ def convert(self, value, param, ctx): "current_files": r"List of current files to be tested, " + r"both for stats and fof files", "factor": r"Relaxation factor for the tolerance values.", - "timing_regex": r"Regex for the file that contains the latest log.", + "log_file": r"Regex for the file that contains the latest log.", "timing_names": r"The name of the timing entries to be displayed.", - "timing_database": r"A persistent file to keep track of performance history.", + "timing_database": r"Path to the timing database files used for performance " + + r"reference or performance checks. This consists of three files (_meta.json, " + + r"_0_tree.json, _0_meta.json) that store timing information.", "append_time": r"If true: append to the performance data; If false: overwrite the " + r"performance data (default: false).", "run_dir": r"Directory from where the run is started " diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py index 7720ab85..a8597bec 100644 --- a/util/dataframe_ops.py +++ b/util/dataframe_ops.py @@ -49,8 +49,11 @@ def compute_rel_diff_dataframe(df1, df2): def compute_division(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: # avoid division by 0 and put nan instead - out = df1 / df2.replace({0: np.nan}) - # put 0 if numerator is 0 as well + if isinstance(df2, (int, float)): + df2 = np.nan if df2 == 0 else df2 + out = df1 / df2 + else: + out = df1 / df2.replace(0, np.nan) out[df1 == 0] = 0 return out diff --git a/util/icon/extract_timings.py b/util/icon/extract_timings.py index 4f923406..38ca251d 100644 --- a/util/icon/extract_timings.py +++ b/util/icon/extract_timings.py @@ -5,16 +5,17 @@ import re import sys -from datetime import datetime import numpy as np +from dateutil.parser import ParserError +from dateutil.parser import parse as parse_date from util.constants import DATETIME_FORMAT from util.log_handler import logger -TIMING_START_REGEX = r"(?: +L? ?[a-zA-Z_.]+)" +TIMING_START_REGEX = r"\s+L?\s*[a-zA-Z_.]+" TIMING_ELEMENT_REGEX = r"(?:\[?\d+[.msh]?\d*s?\]? +)" -TIMING_REGEX = TIMING_START_REGEX + " +" + TIMING_ELEMENT_REGEX + "{6,20} *(?!.)" +TIMING_REGEX = TIMING_START_REGEX + r"\s+(?:" + TIMING_ELEMENT_REGEX + r"){6,20} *(?!.)" HEADER_REGEX = r"name +.*calls.*" INDENT_REGEX = r"^ *L? " HOUR_REGEX = r"(\d+)h(\d+)m(\d+)s" @@ -22,35 +23,9 @@ SEC_REGEX = r"(\d+[.]?\d*)s" NUMBER_REGEX = r"(\d+[.]?\d*)" -dateline_regexs = ( - r"(?:[A-Z][a-z]{2} +){2}\d{1,2} \d{2}:\d{2}:\d{2} [A-Z]{3,4} 20\d{2}", - ( - r"(?:[A-Z][a-z]{2} +)\d{1,2} (?:[A-Z][a-z]{2} +)20\d{2} \d{2}:\d{2}:\d{2} " - "[A-Z]{2} [A-Z]{3,4}" - ), -) -icon_date_formats = ("%a %b %d %H:%M:%S %Z %Y", "%a %d %b %Y %H:%M:%S %p %Z") - DICT_REGEX = r"^\s*{} *: *(.*)" -def _convert_dateline_to_start_end_datetime(dateline, icon_date_format): - # LOG.check files have more dates than we need - # The dates we are interested in are always at the same position relative to the - # other dates - if len(dateline) > 2: - dateline = [dateline[1], dateline[2]] - start_time, finish_time = dateline - - finish_datetime = datetime.strptime(finish_time, icon_date_format) - finish_datetime_converted = finish_datetime.strftime(DATETIME_FORMAT) - - start_datetime = datetime.strptime(start_time, icon_date_format) - start_datetime_converted = start_datetime.strftime(DATETIME_FORMAT) - - return (start_datetime_converted, finish_datetime_converted) - - def read_logfile(filename): with open(filename, "r", encoding="latin-1") as f: # read file into list of lines, remove empty lines @@ -121,22 +96,26 @@ def read_logfile(filename): meta_data = {} # get start and finish time from job - found_dateline_yes = False - start_datetime_converted = "" - finish_datetime_converted = "" - for dateline_regex, icon_date_format in zip(dateline_regexs, icon_date_formats): - dateline = re.findall(dateline_regex, full_file) - - if dateline: - ( - start_datetime_converted, - finish_datetime_converted, - ) = _convert_dateline_to_start_end_datetime(dateline, icon_date_format) - found_dateline_yes = True - if not found_dateline_yes: - raise Exception("Could not match any regex for start and end time.") - meta_data["start_time"] = start_datetime_converted - meta_data["finish_time"] = finish_datetime_converted + # --- robust start/finish datetime extraction --- + + datelines = [] + + for line in full_file.splitlines(): + if line.count(":") >= 2: + try: + dt = parse_date(line, fuzzy=False) + datelines.append(dt) + except (ParserError, ValueError): + continue + + if len(datelines) < 2: + raise Exception("Could not robustly determine start and finish time.") + + start_dt = datelines[0] + finish_dt = datelines[-1] + + meta_data["start_time"] = start_dt.strftime(DATETIME_FORMAT) + meta_data["finish_time"] = finish_dt.strftime(DATETIME_FORMAT) # get meta data from ICON log (in the form "Key : Value") revision = re.search(