decko · decko · May 26, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/changes/300.fix b/changes/300.fix
@@ -0,0 +1 @@
+Align CLI and HTML score color thresholds to eliminate the green/yellow inconsistency at the 0.80–0.85 boundary. ``color_for_score()`` in ``cli_summary.py`` now reads from the shared ``ZONE_THRESHOLDS`` constant (green ≥ 0.85) instead of a hard-coded 0.80 cutoff, matching the HTML report's coloring exactly.
diff --git a/src/raki/report/_thresholds.py b/src/raki/report/_thresholds.py
@@ -0,0 +1,17 @@
+"""Shared zone threshold constants — single source of truth for all color-zone logic.
+
+Both ``html_report.py`` and ``cli_summary.py`` import from this module so that
+CLI and HTML color functions always apply identical thresholds.
+"""
+
+from __future__ import annotations
+
+# Central zone threshold config — single source of truth for all color-zone logic.
+ZONE_THRESHOLDS: dict[str, dict[str, float]] = {
+    # 0-1 score metrics: green >= 0.85, amber 0.60-0.84, red < 0.60
+    "score": {"green": 0.85, "amber": 0.60},
+    # Cost (currency) metrics: green <= $5, amber $5-$15, red > $15
+    "cost": {"green": 5.0, "amber": 15.0},
+    # Rework cycles: green <= 0.3, amber 0.3-1.0, red > 1.0
+    "rework_cycles": {"green": 0.3, "amber": 1.0},
+}
diff --git a/src/raki/report/cli_summary.py b/src/raki/report/cli_summary.py
@@ -10,6 +10,7 @@
 
 from raki.metrics.protocol import Metric
 from raki.model.report import EvalReport
+from raki.report._thresholds import ZONE_THRESHOLDS
 
 if TYPE_CHECKING:
     from raki.report.cohort import CohortSummary
@@ -76,15 +77,16 @@ def color_for_score(
 ) -> str:
     """Color-code a score value.
 
+    Thresholds sourced from ZONE_THRESHOLDS: green >= 0.85, amber 0.60-0.84, red < 0.60.
     Skip color for non-ratio metrics (currency, count) where higher_is_better
     is False -- those values are not on a 0-1 scale.
     """
     if not higher_is_better and display_format in ("currency", "count", "duration"):
         return "white"
     if higher_is_better:
-        if score >= 0.8:
+        if score >= ZONE_THRESHOLDS["score"]["green"]:
             return "green"
-        if score >= 0.6:
+        if score >= ZONE_THRESHOLDS["score"]["amber"]:
             return "yellow"
         return "red"
     else:

diff --git a/src/raki/report/html_report.py b/src/raki/report/html_report.py
@@ -10,6 +10,7 @@
 from raki.model.dataset import EvalSample
 from raki.model.phases import PhaseResult
 from raki.model.report import EvalReport, SampleResult
+from raki.report._thresholds import ZONE_THRESHOLDS
 from raki.report.cli_summary import (
     EXPERIMENTAL_METRICS,
     KNOWLEDGE_METRICS,
@@ -22,16 +23,6 @@
     from raki.report.diff import DiffReport
     from raki.report.sparkline import SparklineData
 
-# Central zone threshold config — single source of truth for all color-zone logic.
-ZONE_THRESHOLDS: dict[str, dict[str, float]] = {
-    # 0-1 score metrics: green >= 0.85, amber 0.60-0.84, red < 0.60
-    "score": {"green": 0.85, "amber": 0.60},
-    # Cost (currency) metrics: green <= $5, amber $5-$15, red > $15
-    "cost": {"green": 5.0, "amber": 15.0},
-    # Rework cycles: green <= 0.3, amber 0.3-1.0, red > 1.0
-    "rework_cycles": {"green": 0.3, "amber": 1.0},
-}
-
 # Metric metadata registry — maps raw metric names to display properties.
 # This mirrors the class-level attributes from each Metric implementation
 # so the HTML report can render display_name, format values, and pick colors

diff --git a/tests/test_report.py b/tests/test_report.py
@@ -1745,3 +1745,99 @@ def test_no_indicators_without_sparklines(self) -> None:
         # No sparklines → no delta indicators
         assert "▲" not in output
         assert "▼" not in output
+
+
+# ---------------------------------------------------------------------------
+# Task 1: ZONE_THRESHOLDS shared module (ticket #300)
+# ---------------------------------------------------------------------------
+
+
+class TestZoneThresholdsModule:
+    """ZONE_THRESHOLDS must be importable from raki.report._thresholds."""
+
+    def test_zone_thresholds_importable(self) -> None:
+        """_thresholds module must export ZONE_THRESHOLDS."""
+        from raki.report._thresholds import ZONE_THRESHOLDS
+
+        assert isinstance(ZONE_THRESHOLDS, dict)
+
+    def test_zone_thresholds_has_score_key(self) -> None:
+        """ZONE_THRESHOLDS must contain a 'score' key with green/amber sub-keys."""
+        from raki.report._thresholds import ZONE_THRESHOLDS
+
+        assert "score" in ZONE_THRESHOLDS
+        assert "green" in ZONE_THRESHOLDS["score"]
+        assert "amber" in ZONE_THRESHOLDS["score"]
+
+    def test_zone_thresholds_score_green_is_085(self) -> None:
+        """ZONE_THRESHOLDS['score']['green'] must be 0.85 (not 0.80)."""
+        from raki.report._thresholds import ZONE_THRESHOLDS
+
+        assert ZONE_THRESHOLDS["score"]["green"] == 0.85
+
+    def test_zone_thresholds_has_cost_key(self) -> None:
+        """ZONE_THRESHOLDS must contain a 'cost' key."""
+        from raki.report._thresholds import ZONE_THRESHOLDS
+
+        assert "cost" in ZONE_THRESHOLDS
+
+    def test_zone_thresholds_has_rework_cycles_key(self) -> None:
+        """ZONE_THRESHOLDS must contain a 'rework_cycles' key."""
+        from raki.report._thresholds import ZONE_THRESHOLDS
+
+        assert "rework_cycles" in ZONE_THRESHOLDS
+
+
+# ---------------------------------------------------------------------------
+# Task 2: html_report imports ZONE_THRESHOLDS from _thresholds (ticket #300)
+# ---------------------------------------------------------------------------
+
+
+class TestHtmlReportUsesSharedThresholds:
+    """html_report.ZONE_THRESHOLDS must be the same object as _thresholds.ZONE_THRESHOLDS."""
+
+    def test_html_report_zone_thresholds_is_shared_object(self) -> None:
+        """html_report.ZONE_THRESHOLDS must be imported from _thresholds (same object)."""
+        from raki.report import html_report
+        from raki.report._thresholds import ZONE_THRESHOLDS
+
+        assert html_report.ZONE_THRESHOLDS is ZONE_THRESHOLDS
+
+    def test_html_report_zone_thresholds_score_green_is_085(self) -> None:
+        """html_report.ZONE_THRESHOLDS['score']['green'] must be 0.85 after import."""
+        from raki.report.html_report import ZONE_THRESHOLDS
+
+        assert ZONE_THRESHOLDS["score"]["green"] == 0.85
+
+
+# ---------------------------------------------------------------------------
+# Task 3: color_for_score() uses shared thresholds at 0.85 boundary (ticket #300)
+# ---------------------------------------------------------------------------
+
+
+class TestColorForScoreAlignedThresholds:
+    """color_for_score() must use ZONE_THRESHOLDS['score']['green'] = 0.85, not 0.80."""
+
+    def test_score_082_is_yellow_not_green(self) -> None:
+        """0.82 is below 0.85 threshold — must return 'yellow', not 'green'."""
+        assert color_for_score(0.82, higher_is_better=True) == "yellow"
+
+    def test_score_084_is_yellow_not_green(self) -> None:
+        """0.84 is below 0.85 threshold — must return 'yellow', not 'green'."""
+        assert color_for_score(0.84, higher_is_better=True) == "yellow"
+
+    def test_score_085_is_green(self) -> None:
+        """0.85 is exactly at the threshold — must return 'green'."""
+        assert color_for_score(0.85, higher_is_better=True) == "green"
+
+    def test_score_086_is_green(self) -> None:
+        """0.86 is above the threshold — must return 'green'."""
+        assert color_for_score(0.86, higher_is_better=True) == "green"
+
+    def test_score_060_is_yellow(self) -> None:
+        """0.60 is at amber lower boundary — must return 'yellow'."""
+        assert color_for_score(0.60, higher_is_better=True) == "yellow"
+
+    def test_score_059_is_red(self) -> None:
+        """0.59 is below amber — must return 'red'."""
+        assert color_for_score(0.59, higher_is_better=True) == "red"
diff --git a/tests/test_report_html.py b/tests/test_report_html.py
@@ -4508,3 +4508,82 @@ def test_degraded_gracefully_without_sparklines(self, tmp_path: Path) -> None:
         assert "score-card" in content
         # No SVG polyline (sparklines not provided)
         assert "<polyline" not in content
+
+
+# ---------------------------------------------------------------------------
+# Task 4: Cross-check parametrized tests for CLI/HTML threshold parity (#300)
+# ---------------------------------------------------------------------------
+
+
+class TestCliHtmlColorBoundaryParity:
+    """CLI color_for_score() and html_color_for_score() must return identical
+    colors at all score boundary values — ensuring no green/yellow inconsistency
+    in the 0.80-0.85 range.
+
+    These tests are the regression guard for ticket #300.
+    """
+
+    # (score, expected_color) pairs covering every boundary region
+    SCORE_CASES = [
+        (0.00, "red"),
+        (0.59, "red"),
+        (0.60, "yellow"),
+        (0.65, "yellow"),
+        (0.80, "yellow"),  # previously green on CLI — the critical bug value
+        (0.82, "yellow"),  # previously green on CLI — the critical bug value
+        (0.84, "yellow"),  # previously green on CLI — the critical bug value
+        (0.85, "green"),  # at the shared threshold boundary
+        (0.86, "green"),
+        (1.00, "green"),
+    ]
+
+    @pytest.mark.parametrize("score,expected_color", SCORE_CASES)
+    def test_cli_color_matches_expected(self, score: float, expected_color: str) -> None:
+        """CLI color_for_score must return expected color at each boundary value."""
+        from raki.report.cli_summary import color_for_score
+
+        assert color_for_score(score, higher_is_better=True) == expected_color, (
+            f"CLI color_for_score({score}) == {color_for_score(score, True)!r}, "
+            f"expected {expected_color!r}"
+        )
+
+    @pytest.mark.parametrize("score,expected_color", SCORE_CASES)
+    def test_html_color_matches_expected(self, score: float, expected_color: str) -> None:
+        """HTML html_color_for_score must return expected color at each boundary value."""
+        from raki.report.html_report import html_color_for_score
+
+        assert html_color_for_score(score, higher_is_better=True) == expected_color, (
+            f"HTML html_color_for_score({score}) == "
+            f"{html_color_for_score(score, True)!r}, expected {expected_color!r}"
+        )
+
+    @pytest.mark.parametrize("score,_expected_color", SCORE_CASES)
+    def test_cli_and_html_return_same_color(self, score: float, _expected_color: str) -> None:
+        """CLI and HTML functions must agree on color for every boundary score value."""
+        from raki.report.cli_summary import color_for_score
+        from raki.report.html_report import html_color_for_score
+
+        cli_color = color_for_score(score, higher_is_better=True)
+        html_color = html_color_for_score(score, higher_is_better=True)
+        assert cli_color == html_color, (
+            f"Color mismatch at score={score}: CLI={cli_color!r}, HTML={html_color!r}"
+        )
+
+    def test_both_use_same_green_threshold_constant(self) -> None:
+        """Both functions must derive their green threshold from ZONE_THRESHOLDS."""
+        from raki.report._thresholds import ZONE_THRESHOLDS
+
+        green_threshold = ZONE_THRESHOLDS["score"]["green"]
+        assert green_threshold == 0.85, (
+            f"ZONE_THRESHOLDS['score']['green'] must be 0.85, got {green_threshold}"
+        )
+
+    def test_score_just_below_green_threshold_is_yellow(self) -> None:
+        """A score one epsilon below the green threshold must be yellow on both CLI and HTML."""
+        from raki.report._thresholds import ZONE_THRESHOLDS
+        from raki.report.cli_summary import color_for_score
+        from raki.report.html_report import html_color_for_score
+
+        just_below = ZONE_THRESHOLDS["score"]["green"] - 0.001
+        assert color_for_score(just_below, higher_is_better=True) == "yellow"
+        assert html_color_for_score(just_below, higher_is_better=True) == "yellow"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Align CLI and HTML score color thresholds to eliminate the green/yellow inconsistency at the 0.80–0.85 boundary. ``color_for_score()`` in ``cli_summary.py`` now reads from the shared ``ZONE_THRESHOLDS`` constant (green ≥ 0.85) instead of a hard-coded 0.80 cutoff, matching the HTML report's coloring exactly.