Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
b3798e0
report and observations in check
cghielmini Dec 1, 2025
b255f0e
improved version of extended check
cghielmini Dec 1, 2025
aac573d
first version of including veri_data in fof-compare
cghielmini Dec 3, 2025
a08b99b
adapt probtest to ekf
cghielmini Dec 12, 2025
357534d
first complete version of this PR
cghielmini Dec 12, 2025
d053bcf
rebasing
cghielmini Jan 8, 2026
43344d3
solve tests failing
cghielmini Jan 8, 2026
e4faee0
integration first part of comments
cghielmini Jan 12, 2026
17f49ac
improve write_differences function
cghielmini Jan 12, 2026
9560e94
solve pylint
cghielmini Jan 12, 2026
33bf265
first draft new version fof-compare
cghielmini Jan 19, 2026
704c0bd
make fof-comare more similar to check
cghielmini Jan 22, 2026
5c67751
log file for error
cghielmini Jan 26, 2026
17e6256
correct path
cghielmini Jan 26, 2026
8c276bb
in progress
cghielmini Jan 26, 2026
84fac35
adapt to ekf
cghielmini Jan 27, 2026
c320c83
make code more efficient and clean
cghielmini Jan 27, 2026
ec729f6
add log file for tolerance
cghielmini Jan 27, 2026
ea37cfd
write log tolerance
cghielmini Jan 27, 2026
b9ab769
Merge branch 'main' into adapt_to_ekf
cghielmini Jan 27, 2026
1c6d5da
change way to write log tolerance
cghielmini Jan 27, 2026
85b9c34
change text write_tolerance_log
cghielmini Jan 27, 2026
60b187a
clean create_tolerance_csv
cghielmini Jan 28, 2026
b554bc8
clean log file creation
cghielmini Jan 29, 2026
d8e1805
fof types names and two loggers
cghielmini Jan 29, 2026
9166d42
differenciate better between detailed and normal logger
cghielmini Jan 30, 2026
f22df50
solved tolerance problem
cghielmini Jan 30, 2026
f488e44
allow multiple log files
cghielmini Jan 30, 2026
c8b1d89
Merge remote-tracking branch 'origin/main' into adapt_to_ekf
cghielmini Jan 30, 2026
082c9eb
function for names
cghielmini Jan 30, 2026
1641123
add description funcitons and polish
cghielmini Jan 30, 2026
70f4b3b
fix tests
cghielmini Jan 30, 2026
3461953
ready for review
cghielmini Jan 30, 2026
360e127
ready for review for real
cghielmini Jan 30, 2026
a6fb9a7
correct pylint
cghielmini Feb 3, 2026
535cfb6
first part of suggestions
cghielmini Mar 2, 2026
5d4ec20
integration all suggestions
cghielmini Mar 2, 2026
91a161e
Update engine/fof_compare.py
cghielmini Mar 3, 2026
6aaf681
add rules and temp directory
cghielmini Mar 3, 2026
abbdc3c
Merge branch 'main' into adapt_to_ekf
cghielmini Mar 4, 2026
3123839
add test for fof-compare
cghielmini Mar 4, 2026
96f0fe5
cleaning test
cghielmini Mar 4, 2026
88d8f31
make fof_type mandatory and add help for file path 1 and 2
cghielmini Mar 5, 2026
9428b08
clean
cghielmini Mar 5, 2026
096eef7
clean commented lines
cghielmini Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 65 additions & 62 deletions engine/fof_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,84 +6,87 @@
Veri data are not considered, only reports and observations are compared.
"""

import tempfile

import click
import pandas as pd
import xarray as xr

from util.click_util import CommaSeparatedStrings, cli_help
from util.dataframe_ops import check_file_with_tolerances
from util.fof_utils import (
compare_var_and_attr_ds,
primary_check,
split_feedback_dataset,
get_log_file_name,
)
from util.log_handler import initialize_detailed_logger, logger
from util.utils import FileInfo


@click.command()
@click.argument("file1", type=click.Path(exists=True))
@click.argument("file2", type=click.Path(exists=True))
@click.option(
"--print-lines",
is_flag=True,
help="Prints the lines where there are differences. "
"If --lines is not specified, then the first 10 "
"differences per variables are shown.",
"--file1",
required=True,
help="Path to the file 1; it must contain the {fof_type} placeholder.",
)
@click.option(
"--lines",
"-n",
default=10,
help="Option to specify how many lines to print " "with the --print-lines option",
"--file2",
required=True,
help="Path to the file 2; it must contain the {fof_type} placeholder.",
)
@click.option(
"--output",
"-o",
is_flag=True,
help="Option to save differences in a CSV file. "
"If the location is not specified, the file "
"is saved in the same location as this code. ",
"--fof-types",
type=CommaSeparatedStrings(),
required=True,
help=cli_help["fof_types"],
)
@click.option(
"--location",
"-l",
default=None,
help="If specified, location where to save the CSV file with the differences.",
"--tolerance",
default=1e-12,
)
def fof_compare(
file1, file2, print_lines, lines, output, location
): # pylint: disable=too-many-positional-arguments

if not primary_check(file1, file2):
print("Different types of files")
return

ds1 = xr.open_dataset(file1)
ds2 = xr.open_dataset(file2)

ds_reports1_sorted, ds_obs1_sorted = split_feedback_dataset(ds1)
ds_reports2_sorted, ds_obs2_sorted = split_feedback_dataset(ds2)

total_elements_all, equal_elements_all = 0, 0

if print_lines:
nl = lines
else:
nl = 0

for ds1, ds2 in [
(ds_reports1_sorted, ds_reports2_sorted),
(ds_obs1_sorted, ds_obs2_sorted),
]:
t, e = compare_var_and_attr_ds(ds1, ds2, nl, output, location)
total_elements_all += t
equal_elements_all += e

if total_elements_all > 0:
percent_equal_all = (equal_elements_all / total_elements_all) * 100
percent_diff_all = 100 - percent_equal_all
print(f"Total percentage of equality: {percent_equal_all:.2f}%")
print(f"Total percentage of difference: {percent_diff_all:.2f}%")
if equal_elements_all == total_elements_all:
print("Files are consistent!")
else:
print("Files are NOT consistent!")
@click.option("--rules", default="")
def fof_compare(file1, file2, fof_types, tolerance, rules):

for fof_type in fof_types:
file1_path = file1.format(fof_type=fof_type)
file2_path = file2.format(fof_type=fof_type)

n_rows_file1 = xr.open_dataset(file1_path).sizes["d_body"]
n_rows_file2 = xr.open_dataset(file2_path).sizes["d_body"]

if n_rows_file1 != n_rows_file2:
raise ValueError("Files have different numbers of lines!")

with tempfile.NamedTemporaryFile(
mode="w", suffix=".csv", delete=True, dir="/dev/shm"
) as tmp:
df = pd.DataFrame({"tolerance": [tolerance] * n_rows_file1})
df.to_csv(tmp.name)

out, err, tol = check_file_with_tolerances(
tmp.name,
FileInfo(file1_path),
FileInfo(file2_path),
factor=1,
rules=rules,
)

if out:
logger.info("Files are consistent!")

else:
logger.info("Files are NOT consistent!")

log_file_name = get_log_file_name(file1_path)
logger.info("Complete output available in %s", log_file_name)
if not err.empty:
detailed_logger = initialize_detailed_logger(
"DETAILS", log_level="DEBUG", log_file=log_file_name
)

detailed_logger.info(
"Differences, veri_data outside of tolerance range"
)
detailed_logger.info(err)
detailed_logger.info(tol)


if __name__ == "__main__":
Expand Down
135 changes: 135 additions & 0 deletions tests/engine/test_fof_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""
This module contains test cases to validate the functionality
of fof-compare CLI commands.
"""

import logging
import os
from pathlib import Path

import pytest
from click.testing import CliRunner

from engine.fof_compare import fof_compare


@pytest.fixture(name="fof_datasets", scope="function")
def fixture_fof_datasets(fof_datasets_base, tmp_dir):
"""
FOF datasets written to disk, returns file paths.
"""
ds1, ds2, _, _ = fof_datasets_base
ds3 = ds2.copy(deep=True)
ds3["flags"] = (("d_body",), ds3["flags"].values * 1.55)

ds1_file = os.path.join(tmp_dir, "fof1_SYNOP.nc")
ds2_file = os.path.join(tmp_dir, "fof2_SYNOP.nc")
ds3_file = os.path.join(tmp_dir, "fof3_SYNOP.nc")

ds1.to_netcdf(ds1_file)
ds2.to_netcdf(ds2_file)
ds3.to_netcdf(ds3_file)

yield ds1_file, ds2_file, ds3_file


def test_fof_compare_works(fof_datasets, tmp_dir, monkeypatch):
"""
Test that fof-compare works and produces a log file.
"""

df1, df2, _ = fof_datasets

df1 = df1.replace("SYNOP", "{fof_type}")
df2 = df2.replace("SYNOP", "{fof_type}")
monkeypatch.chdir(tmp_dir)
rules = ""
runner = CliRunner()

result = runner.invoke(
fof_compare,
[
"--file1",
df1,
"--file2",
df2,
"--fof-types",
"SYNOP",
"--tolerance",
"1e-12",
"--rules",
rules,
],
)

assert result.exit_code == 0

log_file = Path(tmp_dir + "/error_fof1_SYNOP.log")

assert (log_file).exists()


def test_fof_compare_not_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
"""
Test that if there are differences in the files, then fof-compare writes
in the log file that the files are not consistent.
"""

df1, _, df3 = fof_datasets
df1 = df1.replace("SYNOP", "{fof_type}")
df3 = df3.replace("SYNOP", "{fof_type}")
monkeypatch.chdir(tmp_dir)

rules = ""
runner = CliRunner()
with caplog.at_level(logging.INFO):
runner.invoke(
fof_compare,
[
"--file1",
df1,
"--file2",
df3,
"--fof-types",
"SYNOP",
"--tolerance",
"5",
"--rules",
rules,
],
)

assert "Files are NOT consistent!" in caplog.text


def test_fof_compare_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
"""
Test that if there are no differences in the files and the tolerance is big
enough, then fof-compare writes in the log file that the files are consistent.
"""

df1, df2, _ = fof_datasets
df1 = df1.replace("SYNOP", "{fof_type}")
df2 = df2.replace("SYNOP", "{fof_type}")
monkeypatch.chdir(tmp_dir)

rules = ""
runner = CliRunner()
with caplog.at_level(logging.INFO):
runner.invoke(
fof_compare,
[
"--file1",
df1,
"--file2",
df2,
"--fof-types",
"SYNOP",
"--tolerance",
"5",
"--rules",
rules,
],
)

assert "Files are consistent!" in caplog.text
33 changes: 17 additions & 16 deletions tests/util/test_dataframe_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from unittest.mock import patch
from unittest.mock import mock_open, patch

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -843,8 +843,12 @@ def test_multiple_solutions_from_dict_no_rules(dataframes_dict):
dict_cur = {key: df.copy() for key, df in dict_ref.items()}
rules = ""

errors = check_multiple_solutions_from_dict(dict_ref, dict_cur, rules)
assert errors == []
with patch("builtins.open", mock_open()):
errors = check_multiple_solutions_from_dict(
dict_ref, dict_cur, rules, log_file_name="file_name.log"
)

assert errors is False


def test_multiple_solutions_from_dict_with_rules(dataframes_dict):
Expand All @@ -855,8 +859,11 @@ def test_multiple_solutions_from_dict_with_rules(dataframes_dict):

rules = {"check": [9, 1], "state": [13, 14]}

errors = check_multiple_solutions_from_dict(dict_ref, dict_cur, rules)
assert errors == []
with patch("builtins.open", mock_open()):
errors = check_multiple_solutions_from_dict(
dict_ref, dict_cur, rules, log_file_name="file_name.log"
)
assert errors is False


def test_multiple_solutions_from_dict_with_rules_wrong(dataframes_dict):
Expand All @@ -867,15 +874,9 @@ def test_multiple_solutions_from_dict_with_rules_wrong(dataframes_dict):

rules = {"check": [9, 1], "state": [13, 14]}

errors = check_multiple_solutions_from_dict(dict_ref, dict_cur, rules)
with patch("builtins.open", mock_open()):
errors = check_multiple_solutions_from_dict(
dict_ref, dict_cur, rules, log_file_name="file_name.log"
)

expected = [
{
"row": 1,
"column": "check",
"file1": np.int64(9),
"file2": np.int64(6),
"error": "values different and not admitted",
}
]
assert errors == expected
assert errors is True
Loading