MeteoSwiss · cghielmini · Mar 6, 2026 · Dec 1, 2025 · Dec 1, 2025 · Dec 3, 2025
diff --git a/engine/fof_compare.py b/engine/fof_compare.py
@@ -6,84 +6,87 @@
 Veri data are not considered, only reports and observations are compared.
 """
 
+import tempfile
+
 import click
+import pandas as pd
 import xarray as xr
 
+from util.click_util import CommaSeparatedStrings, cli_help
+from util.dataframe_ops import check_file_with_tolerances
 from util.fof_utils import (
-    compare_var_and_attr_ds,
-    primary_check,
-    split_feedback_dataset,
+    get_log_file_name,
 )
+from util.log_handler import initialize_detailed_logger, logger
+from util.utils import FileInfo
 
 
 @click.command()
-@click.argument("file1", type=click.Path(exists=True))
-@click.argument("file2", type=click.Path(exists=True))
 @click.option(
-    "--print-lines",
-    is_flag=True,
-    help="Prints the lines where there are differences. "
-    "If --lines is not specified, then the first 10 "
-    "differences per variables are shown.",
+    "--file1",
+    required=True,
+    help="Path to the file 1; it must contain the {fof_type} placeholder.",
 )
 @click.option(
-    "--lines",
-    "-n",
-    default=10,
-    help="Option to specify how many lines to print " "with the --print-lines option",
+    "--file2",
+    required=True,
+    help="Path to the file 2; it must contain the {fof_type} placeholder.",
 )
 @click.option(
-    "--output",
-    "-o",
-    is_flag=True,
-    help="Option to save differences in a CSV file. "
-    "If the location is not specified, the file "
-    "is saved in the same location as this code. ",
+    "--fof-types",
+    type=CommaSeparatedStrings(),
+    required=True,
+    help=cli_help["fof_types"],
 )
 @click.option(
-    "--location",
-    "-l",
-    default=None,
-    help="If specified, location where to save the CSV file with the differences.",
+    "--tolerance",
+    default=1e-12,
 )
-def fof_compare(
-    file1, file2, print_lines, lines, output, location
-):  # pylint: disable=too-many-positional-arguments
-
-    if not primary_check(file1, file2):
-        print("Different types of files")
-        return
-
-    ds1 = xr.open_dataset(file1)
-    ds2 = xr.open_dataset(file2)
-
-    ds_reports1_sorted, ds_obs1_sorted = split_feedback_dataset(ds1)
-    ds_reports2_sorted, ds_obs2_sorted = split_feedback_dataset(ds2)
-
-    total_elements_all, equal_elements_all = 0, 0
-
-    if print_lines:
-        nl = lines
-    else:
-        nl = 0
-
-    for ds1, ds2 in [
-        (ds_reports1_sorted, ds_reports2_sorted),
-        (ds_obs1_sorted, ds_obs2_sorted),
-    ]:
-        t, e = compare_var_and_attr_ds(ds1, ds2, nl, output, location)
-        total_elements_all += t
-        equal_elements_all += e
-
-    if total_elements_all > 0:
-        percent_equal_all = (equal_elements_all / total_elements_all) * 100
-        percent_diff_all = 100 - percent_equal_all
-        print(f"Total percentage of equality: {percent_equal_all:.2f}%")
-        print(f"Total percentage of difference: {percent_diff_all:.2f}%")
-        if equal_elements_all == total_elements_all:
-            print("Files are consistent!")
-        else:
-            print("Files are NOT consistent!")
+@click.option("--rules", default="")
+def fof_compare(file1, file2, fof_types, tolerance, rules):
+
+    for fof_type in fof_types:
+        file1_path = file1.format(fof_type=fof_type)
+        file2_path = file2.format(fof_type=fof_type)
+
+        n_rows_file1 = xr.open_dataset(file1_path).sizes["d_body"]
+        n_rows_file2 = xr.open_dataset(file2_path).sizes["d_body"]
+
+        if n_rows_file1 != n_rows_file2:
+            raise ValueError("Files have different numbers of lines!")
+
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".csv", delete=True, dir="/dev/shm"
+        ) as tmp:
+            df = pd.DataFrame({"tolerance": [tolerance] * n_rows_file1})
+            df.to_csv(tmp.name)
+
+            out, err, tol = check_file_with_tolerances(
+                tmp.name,
+                FileInfo(file1_path),
+                FileInfo(file2_path),
+                factor=1,
+                rules=rules,
+            )
+
+            if out:
+                logger.info("Files are consistent!")
+
+            else:
+                logger.info("Files are NOT consistent!")
+
+                log_file_name = get_log_file_name(file1_path)
+                logger.info("Complete output available in %s", log_file_name)
+                if not err.empty:
+                    detailed_logger = initialize_detailed_logger(
+                        "DETAILS", log_level="DEBUG", log_file=log_file_name
+                    )
+
+                    detailed_logger.info(
+                        "Differences, veri_data outside of tolerance range"
+                    )
+                    detailed_logger.info(err)
+                    detailed_logger.info(tol)
 
 
 if __name__ == "__main__":

diff --git a/tests/engine/test_fof_compare.py b/tests/engine/test_fof_compare.py
@@ -0,0 +1,135 @@
+"""
+This module contains test cases to validate the functionality
+of fof-compare CLI commands.
+"""
+
+import logging
+import os
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+
+from engine.fof_compare import fof_compare
+
+
+@pytest.fixture(name="fof_datasets", scope="function")
+def fixture_fof_datasets(fof_datasets_base, tmp_dir):
+    """
+    FOF datasets written to disk, returns file paths.
+    """
+    ds1, ds2, _, _ = fof_datasets_base
+    ds3 = ds2.copy(deep=True)
+    ds3["flags"] = (("d_body",), ds3["flags"].values * 1.55)
+
+    ds1_file = os.path.join(tmp_dir, "fof1_SYNOP.nc")
+    ds2_file = os.path.join(tmp_dir, "fof2_SYNOP.nc")
+    ds3_file = os.path.join(tmp_dir, "fof3_SYNOP.nc")
+
+    ds1.to_netcdf(ds1_file)
+    ds2.to_netcdf(ds2_file)
+    ds3.to_netcdf(ds3_file)
+
+    yield ds1_file, ds2_file, ds3_file
+
+
+def test_fof_compare_works(fof_datasets, tmp_dir, monkeypatch):
+    """
+    Test that fof-compare works and produces a log file.
+    """
+
+    df1, df2, _ = fof_datasets
+
+    df1 = df1.replace("SYNOP", "{fof_type}")
+    df2 = df2.replace("SYNOP", "{fof_type}")
+    monkeypatch.chdir(tmp_dir)
+    rules = ""
+    runner = CliRunner()
+
+    result = runner.invoke(
+        fof_compare,
+        [
+            "--file1",
+            df1,
+            "--file2",
+            df2,
+            "--fof-types",
+            "SYNOP",
+            "--tolerance",
+            "1e-12",
+            "--rules",
+            rules,
+        ],
+    )
+
+    assert result.exit_code == 0
+
+    log_file = Path(tmp_dir + "/error_fof1_SYNOP.log")
+
+    assert (log_file).exists()
+
+
+def test_fof_compare_not_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
+    """
+    Test that if there are differences in the files, then fof-compare writes
+    in the log file that the files are not consistent.
+    """
+
+    df1, _, df3 = fof_datasets
+    df1 = df1.replace("SYNOP", "{fof_type}")
+    df3 = df3.replace("SYNOP", "{fof_type}")
+    monkeypatch.chdir(tmp_dir)
+
+    rules = ""
+    runner = CliRunner()
+    with caplog.at_level(logging.INFO):
+        runner.invoke(
+            fof_compare,
+            [
+                "--file1",
+                df1,
+                "--file2",
+                df3,
+                "--fof-types",
+                "SYNOP",
+                "--tolerance",
+                "5",
+                "--rules",
+                rules,
+            ],
+        )
+
+    assert "Files are NOT consistent!" in caplog.text
+
+
+def test_fof_compare_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
+    """
+    Test that if there are no differences in the files and the tolerance is big
+    enough, then fof-compare writes in the log file that the files are consistent.
+    """
+
+    df1, df2, _ = fof_datasets
+    df1 = df1.replace("SYNOP", "{fof_type}")
+    df2 = df2.replace("SYNOP", "{fof_type}")
+    monkeypatch.chdir(tmp_dir)
+
+    rules = ""
+    runner = CliRunner()
+    with caplog.at_level(logging.INFO):
+        runner.invoke(
+            fof_compare,
+            [
+                "--file1",
+                df1,
+                "--file2",
+                df2,
+                "--fof-types",
+                "SYNOP",
+                "--tolerance",
+                "5",
+                "--rules",
+                rules,
+            ],
+        )
+
+    assert "Files are consistent!" in caplog.text
diff --git a/tests/util/test_dataframe_ops.py b/tests/util/test_dataframe_ops.py
@@ -5,7 +5,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional
-from unittest.mock import patch
+from unittest.mock import mock_open, patch
 
 import numpy as np
 import pandas as pd
@@ -843,8 +843,12 @@ def test_multiple_solutions_from_dict_no_rules(dataframes_dict):
     dict_cur = {key: df.copy() for key, df in dict_ref.items()}
     rules = ""
 
-    errors = check_multiple_solutions_from_dict(dict_ref, dict_cur, rules)
-    assert errors == []
+    with patch("builtins.open", mock_open()):
+        errors = check_multiple_solutions_from_dict(
+            dict_ref, dict_cur, rules, log_file_name="file_name.log"
+        )
+
+    assert errors is False
 
 
 def test_multiple_solutions_from_dict_with_rules(dataframes_dict):
@@ -855,8 +859,11 @@ def test_multiple_solutions_from_dict_with_rules(dataframes_dict):
 
     rules = {"check": [9, 1], "state": [13, 14]}
 
-    errors = check_multiple_solutions_from_dict(dict_ref, dict_cur, rules)
-    assert errors == []
+    with patch("builtins.open", mock_open()):
+        errors = check_multiple_solutions_from_dict(
+            dict_ref, dict_cur, rules, log_file_name="file_name.log"
+        )
+    assert errors is False
 
 
 def test_multiple_solutions_from_dict_with_rules_wrong(dataframes_dict):
@@ -867,15 +874,9 @@ def test_multiple_solutions_from_dict_with_rules_wrong(dataframes_dict):
 
     rules = {"check": [9, 1], "state": [13, 14]}
 
-    errors = check_multiple_solutions_from_dict(dict_ref, dict_cur, rules)
+    with patch("builtins.open", mock_open()):
+        errors = check_multiple_solutions_from_dict(
+            dict_ref, dict_cur, rules, log_file_name="file_name.log"
+        )
 
-    expected = [
-        {
-            "row": 1,
-            "column": "check",
-            "file1": np.int64(9),
-            "file2": np.int64(6),
-            "error": "values different and not admitted",
-        }
-    ]
-    assert errors == expected
+    assert errors is True