Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/300.fix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Align CLI and HTML score color thresholds to eliminate the green/yellow inconsistency at the 0.80–0.85 boundary. ``color_for_score()`` in ``cli_summary.py`` now reads from the shared ``ZONE_THRESHOLDS`` constant (green ≥ 0.85) instead of a hard-coded 0.80 cutoff, matching the HTML report's coloring exactly.
17 changes: 17 additions & 0 deletions src/raki/report/_thresholds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Shared zone threshold constants — single source of truth for all color-zone logic.

Both ``html_report.py`` and ``cli_summary.py`` import from this module so that
CLI and HTML color functions always apply identical thresholds.
"""

from __future__ import annotations

# Central zone threshold config — single source of truth for all color-zone logic.
ZONE_THRESHOLDS: dict[str, dict[str, float]] = {
# 0-1 score metrics: green >= 0.85, amber 0.60-0.84, red < 0.60
"score": {"green": 0.85, "amber": 0.60},
# Cost (currency) metrics: green <= $5, amber $5-$15, red > $15
"cost": {"green": 5.0, "amber": 15.0},
# Rework cycles: green <= 0.3, amber 0.3-1.0, red > 1.0
"rework_cycles": {"green": 0.3, "amber": 1.0},
}
6 changes: 4 additions & 2 deletions src/raki/report/cli_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from raki.metrics.protocol import Metric
from raki.model.report import EvalReport
from raki.report._thresholds import ZONE_THRESHOLDS

if TYPE_CHECKING:
from raki.report.cohort import CohortSummary
Expand Down Expand Up @@ -76,15 +77,16 @@ def color_for_score(
) -> str:
"""Color-code a score value.

Thresholds sourced from ZONE_THRESHOLDS: green >= 0.85, amber 0.60-0.84, red < 0.60.
Skip color for non-ratio metrics (currency, count) where higher_is_better
is False -- those values are not on a 0-1 scale.
"""
if not higher_is_better and display_format in ("currency", "count", "duration"):
return "white"
if higher_is_better:
if score >= 0.8:
if score >= ZONE_THRESHOLDS["score"]["green"]:
return "green"
if score >= 0.6:
if score >= ZONE_THRESHOLDS["score"]["amber"]:
return "yellow"
return "red"
else:
Expand Down
11 changes: 1 addition & 10 deletions src/raki/report/html_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from raki.model.dataset import EvalSample
from raki.model.phases import PhaseResult
from raki.model.report import EvalReport, SampleResult
from raki.report._thresholds import ZONE_THRESHOLDS
from raki.report.cli_summary import (
EXPERIMENTAL_METRICS,
KNOWLEDGE_METRICS,
Expand All @@ -22,16 +23,6 @@
from raki.report.diff import DiffReport
from raki.report.sparkline import SparklineData

# Central zone threshold config — single source of truth for all color-zone logic.
ZONE_THRESHOLDS: dict[str, dict[str, float]] = {
# 0-1 score metrics: green >= 0.85, amber 0.60-0.84, red < 0.60
"score": {"green": 0.85, "amber": 0.60},
# Cost (currency) metrics: green <= $5, amber $5-$15, red > $15
"cost": {"green": 5.0, "amber": 15.0},
# Rework cycles: green <= 0.3, amber 0.3-1.0, red > 1.0
"rework_cycles": {"green": 0.3, "amber": 1.0},
}

# Metric metadata registry — maps raw metric names to display properties.
# This mirrors the class-level attributes from each Metric implementation
# so the HTML report can render display_name, format values, and pick colors
Expand Down
96 changes: 96 additions & 0 deletions tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -1745,3 +1745,99 @@ def test_no_indicators_without_sparklines(self) -> None:
# No sparklines → no delta indicators
assert "▲" not in output
assert "▼" not in output


# ---------------------------------------------------------------------------
# Task 1: ZONE_THRESHOLDS shared module (ticket #300)
# ---------------------------------------------------------------------------


class TestZoneThresholdsModule:
"""ZONE_THRESHOLDS must be importable from raki.report._thresholds."""

def test_zone_thresholds_importable(self) -> None:
"""_thresholds module must export ZONE_THRESHOLDS."""
from raki.report._thresholds import ZONE_THRESHOLDS

assert isinstance(ZONE_THRESHOLDS, dict)

def test_zone_thresholds_has_score_key(self) -> None:
"""ZONE_THRESHOLDS must contain a 'score' key with green/amber sub-keys."""
from raki.report._thresholds import ZONE_THRESHOLDS

assert "score" in ZONE_THRESHOLDS
assert "green" in ZONE_THRESHOLDS["score"]
assert "amber" in ZONE_THRESHOLDS["score"]

def test_zone_thresholds_score_green_is_085(self) -> None:
"""ZONE_THRESHOLDS['score']['green'] must be 0.85 (not 0.80)."""
from raki.report._thresholds import ZONE_THRESHOLDS

assert ZONE_THRESHOLDS["score"]["green"] == 0.85

def test_zone_thresholds_has_cost_key(self) -> None:
"""ZONE_THRESHOLDS must contain a 'cost' key."""
from raki.report._thresholds import ZONE_THRESHOLDS

assert "cost" in ZONE_THRESHOLDS

def test_zone_thresholds_has_rework_cycles_key(self) -> None:
"""ZONE_THRESHOLDS must contain a 'rework_cycles' key."""
from raki.report._thresholds import ZONE_THRESHOLDS

assert "rework_cycles" in ZONE_THRESHOLDS


# ---------------------------------------------------------------------------
# Task 2: html_report imports ZONE_THRESHOLDS from _thresholds (ticket #300)
# ---------------------------------------------------------------------------


class TestHtmlReportUsesSharedThresholds:
"""html_report.ZONE_THRESHOLDS must be the same object as _thresholds.ZONE_THRESHOLDS."""

def test_html_report_zone_thresholds_is_shared_object(self) -> None:
"""html_report.ZONE_THRESHOLDS must be imported from _thresholds (same object)."""
from raki.report import html_report
from raki.report._thresholds import ZONE_THRESHOLDS

assert html_report.ZONE_THRESHOLDS is ZONE_THRESHOLDS

def test_html_report_zone_thresholds_score_green_is_085(self) -> None:
"""html_report.ZONE_THRESHOLDS['score']['green'] must be 0.85 after import."""
from raki.report.html_report import ZONE_THRESHOLDS

assert ZONE_THRESHOLDS["score"]["green"] == 0.85


# ---------------------------------------------------------------------------
# Task 3: color_for_score() uses shared thresholds at 0.85 boundary (ticket #300)
# ---------------------------------------------------------------------------


class TestColorForScoreAlignedThresholds:
"""color_for_score() must use ZONE_THRESHOLDS['score']['green'] = 0.85, not 0.80."""

def test_score_082_is_yellow_not_green(self) -> None:
"""0.82 is below 0.85 threshold — must return 'yellow', not 'green'."""
assert color_for_score(0.82, higher_is_better=True) == "yellow"

def test_score_084_is_yellow_not_green(self) -> None:
"""0.84 is below 0.85 threshold — must return 'yellow', not 'green'."""
assert color_for_score(0.84, higher_is_better=True) == "yellow"

def test_score_085_is_green(self) -> None:
"""0.85 is exactly at the threshold — must return 'green'."""
assert color_for_score(0.85, higher_is_better=True) == "green"

def test_score_086_is_green(self) -> None:
"""0.86 is above the threshold — must return 'green'."""
assert color_for_score(0.86, higher_is_better=True) == "green"

def test_score_060_is_yellow(self) -> None:
"""0.60 is at amber lower boundary — must return 'yellow'."""
assert color_for_score(0.60, higher_is_better=True) == "yellow"

def test_score_059_is_red(self) -> None:
"""0.59 is below amber — must return 'red'."""
assert color_for_score(0.59, higher_is_better=True) == "red"
79 changes: 79 additions & 0 deletions tests/test_report_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4508,3 +4508,82 @@ def test_degraded_gracefully_without_sparklines(self, tmp_path: Path) -> None:
assert "score-card" in content
# No SVG polyline (sparklines not provided)
assert "<polyline" not in content


# ---------------------------------------------------------------------------
# Task 4: Cross-check parametrized tests for CLI/HTML threshold parity (#300)
# ---------------------------------------------------------------------------


class TestCliHtmlColorBoundaryParity:
"""CLI color_for_score() and html_color_for_score() must return identical
colors at all score boundary values — ensuring no green/yellow inconsistency
in the 0.80-0.85 range.

These tests are the regression guard for ticket #300.
"""

# (score, expected_color) pairs covering every boundary region
SCORE_CASES = [
(0.00, "red"),
(0.59, "red"),
(0.60, "yellow"),
(0.65, "yellow"),
(0.80, "yellow"), # previously green on CLI — the critical bug value
(0.82, "yellow"), # previously green on CLI — the critical bug value
(0.84, "yellow"), # previously green on CLI — the critical bug value
(0.85, "green"), # at the shared threshold boundary
(0.86, "green"),
(1.00, "green"),
]

@pytest.mark.parametrize("score,expected_color", SCORE_CASES)
def test_cli_color_matches_expected(self, score: float, expected_color: str) -> None:
"""CLI color_for_score must return expected color at each boundary value."""
from raki.report.cli_summary import color_for_score

assert color_for_score(score, higher_is_better=True) == expected_color, (
f"CLI color_for_score({score}) == {color_for_score(score, True)!r}, "
f"expected {expected_color!r}"
)

@pytest.mark.parametrize("score,expected_color", SCORE_CASES)
def test_html_color_matches_expected(self, score: float, expected_color: str) -> None:
"""HTML html_color_for_score must return expected color at each boundary value."""
from raki.report.html_report import html_color_for_score

assert html_color_for_score(score, higher_is_better=True) == expected_color, (
f"HTML html_color_for_score({score}) == "
f"{html_color_for_score(score, True)!r}, expected {expected_color!r}"
)

@pytest.mark.parametrize("score,_expected_color", SCORE_CASES)
def test_cli_and_html_return_same_color(self, score: float, _expected_color: str) -> None:
"""CLI and HTML functions must agree on color for every boundary score value."""
from raki.report.cli_summary import color_for_score
from raki.report.html_report import html_color_for_score

cli_color = color_for_score(score, higher_is_better=True)
html_color = html_color_for_score(score, higher_is_better=True)
assert cli_color == html_color, (
f"Color mismatch at score={score}: CLI={cli_color!r}, HTML={html_color!r}"
)

def test_both_use_same_green_threshold_constant(self) -> None:
"""Both functions must derive their green threshold from ZONE_THRESHOLDS."""
from raki.report._thresholds import ZONE_THRESHOLDS

green_threshold = ZONE_THRESHOLDS["score"]["green"]
assert green_threshold == 0.85, (
f"ZONE_THRESHOLDS['score']['green'] must be 0.85, got {green_threshold}"
)

def test_score_just_below_green_threshold_is_yellow(self) -> None:
"""A score one epsilon below the green threshold must be yellow on both CLI and HTML."""
from raki.report._thresholds import ZONE_THRESHOLDS
from raki.report.cli_summary import color_for_score
from raki.report.html_report import html_color_for_score

just_below = ZONE_THRESHOLDS["score"]["green"] - 0.001
assert color_for_score(just_below, higher_is_better=True) == "yellow"
assert html_color_for_score(just_below, higher_is_better=True) == "yellow"
Loading