diff --git a/changes/300.fix b/changes/300.fix new file mode 100644 index 0000000..0750380 --- /dev/null +++ b/changes/300.fix @@ -0,0 +1 @@ +Align CLI and HTML score color thresholds to eliminate the green/yellow inconsistency at the 0.80–0.85 boundary. ``color_for_score()`` in ``cli_summary.py`` now reads from the shared ``ZONE_THRESHOLDS`` constant (green ≥ 0.85) instead of a hard-coded 0.80 cutoff, matching the HTML report's coloring exactly. diff --git a/src/raki/report/_thresholds.py b/src/raki/report/_thresholds.py new file mode 100644 index 0000000..4c29881 --- /dev/null +++ b/src/raki/report/_thresholds.py @@ -0,0 +1,17 @@ +"""Shared zone threshold constants — single source of truth for all color-zone logic. + +Both ``html_report.py`` and ``cli_summary.py`` import from this module so that +CLI and HTML color functions always apply identical thresholds. +""" + +from __future__ import annotations + +# Central zone threshold config — single source of truth for all color-zone logic. +ZONE_THRESHOLDS: dict[str, dict[str, float]] = { + # 0-1 score metrics: green >= 0.85, amber 0.60-0.84, red < 0.60 + "score": {"green": 0.85, "amber": 0.60}, + # Cost (currency) metrics: green <= $5, amber $5-$15, red > $15 + "cost": {"green": 5.0, "amber": 15.0}, + # Rework cycles: green <= 0.3, amber 0.3-1.0, red > 1.0 + "rework_cycles": {"green": 0.3, "amber": 1.0}, +} diff --git a/src/raki/report/cli_summary.py b/src/raki/report/cli_summary.py index 9c7a0d6..3053c4b 100644 --- a/src/raki/report/cli_summary.py +++ b/src/raki/report/cli_summary.py @@ -10,6 +10,7 @@ from raki.metrics.protocol import Metric from raki.model.report import EvalReport +from raki.report._thresholds import ZONE_THRESHOLDS if TYPE_CHECKING: from raki.report.cohort import CohortSummary @@ -76,15 +77,16 @@ def color_for_score( ) -> str: """Color-code a score value. + Thresholds sourced from ZONE_THRESHOLDS: green >= 0.85, amber 0.60-0.84, red < 0.60. Skip color for non-ratio metrics (currency, count) where higher_is_better is False -- those values are not on a 0-1 scale. """ if not higher_is_better and display_format in ("currency", "count", "duration"): return "white" if higher_is_better: - if score >= 0.8: + if score >= ZONE_THRESHOLDS["score"]["green"]: return "green" - if score >= 0.6: + if score >= ZONE_THRESHOLDS["score"]["amber"]: return "yellow" return "red" else: diff --git a/src/raki/report/html_report.py b/src/raki/report/html_report.py index d8923dd..09974c4 100644 --- a/src/raki/report/html_report.py +++ b/src/raki/report/html_report.py @@ -10,6 +10,7 @@ from raki.model.dataset import EvalSample from raki.model.phases import PhaseResult from raki.model.report import EvalReport, SampleResult +from raki.report._thresholds import ZONE_THRESHOLDS from raki.report.cli_summary import ( EXPERIMENTAL_METRICS, KNOWLEDGE_METRICS, @@ -22,16 +23,6 @@ from raki.report.diff import DiffReport from raki.report.sparkline import SparklineData -# Central zone threshold config — single source of truth for all color-zone logic. -ZONE_THRESHOLDS: dict[str, dict[str, float]] = { - # 0-1 score metrics: green >= 0.85, amber 0.60-0.84, red < 0.60 - "score": {"green": 0.85, "amber": 0.60}, - # Cost (currency) metrics: green <= $5, amber $5-$15, red > $15 - "cost": {"green": 5.0, "amber": 15.0}, - # Rework cycles: green <= 0.3, amber 0.3-1.0, red > 1.0 - "rework_cycles": {"green": 0.3, "amber": 1.0}, -} - # Metric metadata registry — maps raw metric names to display properties. # This mirrors the class-level attributes from each Metric implementation # so the HTML report can render display_name, format values, and pick colors diff --git a/tests/test_report.py b/tests/test_report.py index e47e7c7..1b57dcc 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -1745,3 +1745,99 @@ def test_no_indicators_without_sparklines(self) -> None: # No sparklines → no delta indicators assert "▲" not in output assert "▼" not in output + + +# --------------------------------------------------------------------------- +# Task 1: ZONE_THRESHOLDS shared module (ticket #300) +# --------------------------------------------------------------------------- + + +class TestZoneThresholdsModule: + """ZONE_THRESHOLDS must be importable from raki.report._thresholds.""" + + def test_zone_thresholds_importable(self) -> None: + """_thresholds module must export ZONE_THRESHOLDS.""" + from raki.report._thresholds import ZONE_THRESHOLDS + + assert isinstance(ZONE_THRESHOLDS, dict) + + def test_zone_thresholds_has_score_key(self) -> None: + """ZONE_THRESHOLDS must contain a 'score' key with green/amber sub-keys.""" + from raki.report._thresholds import ZONE_THRESHOLDS + + assert "score" in ZONE_THRESHOLDS + assert "green" in ZONE_THRESHOLDS["score"] + assert "amber" in ZONE_THRESHOLDS["score"] + + def test_zone_thresholds_score_green_is_085(self) -> None: + """ZONE_THRESHOLDS['score']['green'] must be 0.85 (not 0.80).""" + from raki.report._thresholds import ZONE_THRESHOLDS + + assert ZONE_THRESHOLDS["score"]["green"] == 0.85 + + def test_zone_thresholds_has_cost_key(self) -> None: + """ZONE_THRESHOLDS must contain a 'cost' key.""" + from raki.report._thresholds import ZONE_THRESHOLDS + + assert "cost" in ZONE_THRESHOLDS + + def test_zone_thresholds_has_rework_cycles_key(self) -> None: + """ZONE_THRESHOLDS must contain a 'rework_cycles' key.""" + from raki.report._thresholds import ZONE_THRESHOLDS + + assert "rework_cycles" in ZONE_THRESHOLDS + + +# --------------------------------------------------------------------------- +# Task 2: html_report imports ZONE_THRESHOLDS from _thresholds (ticket #300) +# --------------------------------------------------------------------------- + + +class TestHtmlReportUsesSharedThresholds: + """html_report.ZONE_THRESHOLDS must be the same object as _thresholds.ZONE_THRESHOLDS.""" + + def test_html_report_zone_thresholds_is_shared_object(self) -> None: + """html_report.ZONE_THRESHOLDS must be imported from _thresholds (same object).""" + from raki.report import html_report + from raki.report._thresholds import ZONE_THRESHOLDS + + assert html_report.ZONE_THRESHOLDS is ZONE_THRESHOLDS + + def test_html_report_zone_thresholds_score_green_is_085(self) -> None: + """html_report.ZONE_THRESHOLDS['score']['green'] must be 0.85 after import.""" + from raki.report.html_report import ZONE_THRESHOLDS + + assert ZONE_THRESHOLDS["score"]["green"] == 0.85 + + +# --------------------------------------------------------------------------- +# Task 3: color_for_score() uses shared thresholds at 0.85 boundary (ticket #300) +# --------------------------------------------------------------------------- + + +class TestColorForScoreAlignedThresholds: + """color_for_score() must use ZONE_THRESHOLDS['score']['green'] = 0.85, not 0.80.""" + + def test_score_082_is_yellow_not_green(self) -> None: + """0.82 is below 0.85 threshold — must return 'yellow', not 'green'.""" + assert color_for_score(0.82, higher_is_better=True) == "yellow" + + def test_score_084_is_yellow_not_green(self) -> None: + """0.84 is below 0.85 threshold — must return 'yellow', not 'green'.""" + assert color_for_score(0.84, higher_is_better=True) == "yellow" + + def test_score_085_is_green(self) -> None: + """0.85 is exactly at the threshold — must return 'green'.""" + assert color_for_score(0.85, higher_is_better=True) == "green" + + def test_score_086_is_green(self) -> None: + """0.86 is above the threshold — must return 'green'.""" + assert color_for_score(0.86, higher_is_better=True) == "green" + + def test_score_060_is_yellow(self) -> None: + """0.60 is at amber lower boundary — must return 'yellow'.""" + assert color_for_score(0.60, higher_is_better=True) == "yellow" + + def test_score_059_is_red(self) -> None: + """0.59 is below amber — must return 'red'.""" + assert color_for_score(0.59, higher_is_better=True) == "red" diff --git a/tests/test_report_html.py b/tests/test_report_html.py index cb41e7a..1eef5b2 100644 --- a/tests/test_report_html.py +++ b/tests/test_report_html.py @@ -4508,3 +4508,82 @@ def test_degraded_gracefully_without_sparklines(self, tmp_path: Path) -> None: assert "score-card" in content # No SVG polyline (sparklines not provided) assert " None: + """CLI color_for_score must return expected color at each boundary value.""" + from raki.report.cli_summary import color_for_score + + assert color_for_score(score, higher_is_better=True) == expected_color, ( + f"CLI color_for_score({score}) == {color_for_score(score, True)!r}, " + f"expected {expected_color!r}" + ) + + @pytest.mark.parametrize("score,expected_color", SCORE_CASES) + def test_html_color_matches_expected(self, score: float, expected_color: str) -> None: + """HTML html_color_for_score must return expected color at each boundary value.""" + from raki.report.html_report import html_color_for_score + + assert html_color_for_score(score, higher_is_better=True) == expected_color, ( + f"HTML html_color_for_score({score}) == " + f"{html_color_for_score(score, True)!r}, expected {expected_color!r}" + ) + + @pytest.mark.parametrize("score,_expected_color", SCORE_CASES) + def test_cli_and_html_return_same_color(self, score: float, _expected_color: str) -> None: + """CLI and HTML functions must agree on color for every boundary score value.""" + from raki.report.cli_summary import color_for_score + from raki.report.html_report import html_color_for_score + + cli_color = color_for_score(score, higher_is_better=True) + html_color = html_color_for_score(score, higher_is_better=True) + assert cli_color == html_color, ( + f"Color mismatch at score={score}: CLI={cli_color!r}, HTML={html_color!r}" + ) + + def test_both_use_same_green_threshold_constant(self) -> None: + """Both functions must derive their green threshold from ZONE_THRESHOLDS.""" + from raki.report._thresholds import ZONE_THRESHOLDS + + green_threshold = ZONE_THRESHOLDS["score"]["green"] + assert green_threshold == 0.85, ( + f"ZONE_THRESHOLDS['score']['green'] must be 0.85, got {green_threshold}" + ) + + def test_score_just_below_green_threshold_is_yellow(self) -> None: + """A score one epsilon below the green threshold must be yellow on both CLI and HTML.""" + from raki.report._thresholds import ZONE_THRESHOLDS + from raki.report.cli_summary import color_for_score + from raki.report.html_report import html_color_for_score + + just_below = ZONE_THRESHOLDS["score"]["green"] - 0.001 + assert color_for_score(just_below, higher_is_better=True) == "yellow" + assert html_color_for_score(just_below, higher_is_better=True) == "yellow"