From ae28dacea0f4acabca29935083d11aec868c2b57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 14:36:38 -0300 Subject: [PATCH 1/2] feat(#320): match history by manifest name field instead of filename for sparkline trends - Add _effective_manifest_name() helper to history.py that prefers EvalManifest.name over the filename when non-empty, falling back to the filename for backward compatibility - Update append_history_entry() to accept manifest_name parameter and store the effective manifest identity (logical name or filename) - Update find_last_matching_entry() to accept str | None and short-circuit on None - Update raki run CLI to compute effective_manifest_id from manifest.name and pass it to load_seen_session_ids, find_last_matching_entry, build_sparkline_data, and append_history_entry - Add 19 tests covering all new behavior - Add towncrier fragment changes/320.feature --- changes/320.feature | 1 + src/raki/cli.py | 16 +- src/raki/report/history.py | 47 +++- tests/test_320_manifest_name_history.py | 354 ++++++++++++++++++++++++ 4 files changed, 410 insertions(+), 8 deletions(-) create mode 100644 changes/320.feature create mode 100644 tests/test_320_manifest_name_history.py diff --git a/changes/320.feature b/changes/320.feature new file mode 100644 index 0000000..352d4ca --- /dev/null +++ b/changes/320.feature @@ -0,0 +1 @@ +History entries now use the manifest ``name:`` field as the match key for sparkline trends and incremental filtering, instead of the manifest filename. Projects that set ``name: my-project`` in their manifest YAML get a stable, rename-proof identity across runs. Projects without a ``name:`` field continue to use the filename (backward-compatible). diff --git a/src/raki/cli.py b/src/raki/cli.py index c868801..6996d3d 100644 --- a/src/raki/cli.py +++ b/src/raki/cli.py @@ -338,6 +338,13 @@ def run( out.print(f"[red]Error loading manifest: {redact_sensitive(str(exc))}[/red]") raise SystemExit(2) from exc + # Compute the effective manifest identity for history filtering. + # Prefer the logical ``name:`` field from the manifest YAML (stable across + # file renames) and fall back to the filename when name is empty. + from raki.report.history import _effective_manifest_name as _eff_mfn + + effective_manifest_id: str | None = _eff_mfn(manifest.name, manifest_file) + # Validate manifest judge.provider against allowed providers early, # before it reaches MetricConfig where it would raise a raw Pydantic error. if manifest.judge is not None and manifest.judge.provider is not None: @@ -427,7 +434,7 @@ def run( try: from raki.report.history import load_seen_session_ids - seen_ids = load_seen_session_ids(_resolved_history_path, manifest=manifest_file.name) + seen_ids = load_seen_session_ids(_resolved_history_path, manifest=effective_manifest_id) if not incremental and not rerun_all and not quiet and seen_ids: out.print( f"[yellow]Warning: {len(seen_ids)} session(s) were already evaluated in a " @@ -547,14 +554,14 @@ def run( existing_entries = load_history(_resolved_history_path) prior = find_last_matching_entry( existing_entries, - manifest_file.name, + effective_manifest_id, len(dataset.samples), ) if prior is not None: time_ago = format_time_ago(prior.timestamp) out.print( f"[yellow]Warning: This exact evaluation was already run {time_ago} " - f"(manifest={manifest_file.name}, sessions={len(dataset.samples)}). " + f"(manifest={effective_manifest_id}, sessions={len(dataset.samples)}). " f"Use --force to run again.[/yellow]" ) return @@ -629,7 +636,7 @@ def run( } run_sparklines = build_sparkline_data( history_entries, - manifest=manifest_file.name, + manifest=effective_manifest_id, metric_polarity=_polarity, ) except Exception: @@ -704,6 +711,7 @@ def run( history_path, sessions_count=len(dataset.samples), manifest_file=manifest_file, + manifest_name=manifest.name, ) except Exception as exc: out.print(f"[yellow]Warning: Failed to write history log: {exc}[/yellow]") diff --git a/src/raki/report/history.py b/src/raki/report/history.py index ce228d1..8a31266 100644 --- a/src/raki/report/history.py +++ b/src/raki/report/history.py @@ -14,6 +14,35 @@ from raki.model.report import EvalReport +def _effective_manifest_name( + manifest_name: str | None, + manifest_file: Path | None, +) -> str | None: + """Return the logical manifest identifier to store in a history entry. + + Priority: + 1. *manifest_name* — the ``name:`` field from the manifest YAML — when it + is a non-empty string. This gives projects a stable, rename-proof + identity that does not change if the manifest file is renamed. + 2. *manifest_file*.name (the filename) when *manifest_name* is absent or + empty — preserves backward-compatibility for projects that have not yet + set ``name:`` in their manifest. + 3. ``None`` when both arguments are absent/empty. + + Args: + manifest_name: The ``EvalManifest.name`` value, or ``None``. + manifest_file: The ``Path`` of the manifest file, or ``None``. + + Returns: + A non-empty string, or ``None`` when no identifier is available. + """ + if manifest_name: + return manifest_name + if manifest_file is not None: + return manifest_file.name + return None + + def _git_sha() -> str | None: """Return the short git SHA of HEAD, or None if not in a git repo.""" try: @@ -62,6 +91,7 @@ def append_history_entry( *, sessions_count: int, manifest_file: Path | None = None, + manifest_name: str | None = None, ) -> None: """Append a single ``HistoryEntry`` line to the JSONL history file. @@ -71,7 +101,12 @@ def append_history_entry( directories) on first call. Subsequent calls append without overwriting existing entries. sessions_count: Number of sessions that were evaluated in this run. - manifest_file: Path to the manifest file used for this run (basename stored). + manifest_file: Path to the manifest file used for this run. + manifest_name: Logical project name from ``EvalManifest.name``. When + non-empty, this value is stored in the ``manifest`` field instead + of the filename, giving the project a stable, rename-proof + identity across runs. Falls back to ``manifest_file.name`` when + absent or empty. Raises: ValueError: If ``history_path`` is a symlink (security guard). @@ -95,7 +130,7 @@ def append_history_entry( timestamp=report.timestamp, sessions_count=sessions_count, metrics=metrics_dict, - manifest=manifest_file.name if manifest_file is not None else None, + manifest=_effective_manifest_name(manifest_name, manifest_file), config_hash=_config_hash(report.config), git_sha=_git_sha(), warning_count=len(report.warnings), @@ -170,7 +205,7 @@ def import_history_entry( def find_last_matching_entry( entries: list[HistoryEntry], - manifest: str, + manifest: str | None, sessions_count: int, ) -> HistoryEntry | None: """Return the most recent history entry matching both *manifest* and *sessions_count*. @@ -180,13 +215,17 @@ def find_last_matching_entry( Args: entries: Existing history entries to search (any order). - manifest: Manifest basename to match against ``entry.manifest``. + manifest: Manifest identifier (logical project name or filename) to match + against ``entry.manifest``. When ``None``, no entries can match and + ``None`` is returned immediately. sessions_count: Session count to match against ``entry.sessions_count``. Returns: The entry with the highest ``timestamp`` that matches both criteria, or ``None`` when no entry matches. """ + if manifest is None: + return None matching = [ entry for entry in entries diff --git a/tests/test_320_manifest_name_history.py b/tests/test_320_manifest_name_history.py new file mode 100644 index 0000000..f0ad503 --- /dev/null +++ b/tests/test_320_manifest_name_history.py @@ -0,0 +1,354 @@ +"""Tests for ticket #320 — match history by manifest name field instead of filename. + +The ``manifest`` field stored in ``HistoryEntry`` should use the logical project +name from ``EvalManifest.name`` (the ``name:`` key in the YAML) when it is +non-empty, falling back to the manifest filename when ``name`` is absent or empty. + +This ensures that: +- Projects with a ``name:`` field get a stable, rename-proof identity. +- Projects without a ``name:`` field retain filename-based matching (backward compat). +- Sparkline / trend filtering and incremental-session filtering all use the same key. +""" + +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path + +import pytest + +from conftest import make_history_entry +from raki.report.history import ( + HistoryEntry, + _effective_manifest_name, + append_history_entry, + find_last_matching_entry, + load_history, + load_seen_session_ids, +) +from raki.report.sparkline import build_sparkline_data +from raki.report.trends import compute_all_trends + + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + + +def _make_report(run_id: str = "eval-320", scores: dict | None = None): + from raki.model.report import EvalReport + + return EvalReport( + run_id=run_id, + timestamp=datetime(2026, 5, 1, 12, 0, 0, tzinfo=timezone.utc), + aggregate_scores=scores or {"first_pass_success_rate": 0.80}, + ) + + +# --------------------------------------------------------------------------- +# _effective_manifest_name helper +# --------------------------------------------------------------------------- + + +class TestEffectiveManifestName: + """Tests for the _effective_manifest_name() helper function.""" + + def test_returns_manifest_name_when_non_empty(self, tmp_path: Path) -> None: + """Non-empty manifest_name takes priority over the filename.""" + manifest_file = tmp_path / "raki.yaml" + manifest_file.touch() + result = _effective_manifest_name("my-project", manifest_file) + assert result == "my-project" + + def test_falls_back_to_filename_when_manifest_name_empty(self, tmp_path: Path) -> None: + """Empty string manifest_name falls back to manifest_file.name.""" + manifest_file = tmp_path / "raki.yaml" + manifest_file.touch() + result = _effective_manifest_name("", manifest_file) + assert result == "raki.yaml" + + def test_falls_back_to_filename_when_manifest_name_none(self, tmp_path: Path) -> None: + """None manifest_name falls back to manifest_file.name.""" + manifest_file = tmp_path / "raki.yaml" + manifest_file.touch() + result = _effective_manifest_name(None, manifest_file) + assert result == "raki.yaml" + + def test_returns_none_when_both_none(self) -> None: + """Returns None when both manifest_name and manifest_file are None.""" + result = _effective_manifest_name(None, None) + assert result is None + + def test_returns_manifest_name_when_file_is_none(self) -> None: + """Non-empty manifest_name is returned even when manifest_file is None.""" + result = _effective_manifest_name("my-project", None) + assert result == "my-project" + + +# --------------------------------------------------------------------------- +# append_history_entry — manifest_name parameter +# --------------------------------------------------------------------------- + + +class TestAppendHistoryEntryManifestName: + """Tests that append_history_entry stores the logical manifest name.""" + + def test_stores_logical_name_when_manifest_name_provided(self, tmp_path: Path) -> None: + """When manifest_name is non-empty, it must be stored in entry.manifest.""" + history_path = tmp_path / "history.jsonl" + report = _make_report() + manifest_file = tmp_path / "raki.yaml" + manifest_file.touch() + append_history_entry( + report, + history_path, + sessions_count=1, + manifest_file=manifest_file, + manifest_name="my-project", + ) + parsed = json.loads(history_path.read_text(encoding="utf-8").strip()) + assert parsed["manifest"] == "my-project" + + def test_falls_back_to_filename_when_manifest_name_empty(self, tmp_path: Path) -> None: + """When manifest_name is empty, entry.manifest must contain the filename.""" + history_path = tmp_path / "history.jsonl" + report = _make_report() + manifest_file = tmp_path / "raki.yaml" + manifest_file.touch() + append_history_entry( + report, + history_path, + sessions_count=1, + manifest_file=manifest_file, + manifest_name="", + ) + parsed = json.loads(history_path.read_text(encoding="utf-8").strip()) + assert parsed["manifest"] == "raki.yaml" + + def test_falls_back_to_filename_when_manifest_name_omitted(self, tmp_path: Path) -> None: + """Existing callers that omit manifest_name still get filename-based storage.""" + history_path = tmp_path / "history.jsonl" + report = _make_report() + manifest_file = tmp_path / "raki.yaml" + manifest_file.touch() + append_history_entry( + report, + history_path, + sessions_count=1, + manifest_file=manifest_file, + # manifest_name not passed → backward-compat + ) + parsed = json.loads(history_path.read_text(encoding="utf-8").strip()) + assert parsed["manifest"] == "raki.yaml" + + def test_manifest_none_when_no_file_and_no_name(self, tmp_path: Path) -> None: + """Without manifest_file or manifest_name, entry.manifest is null.""" + history_path = tmp_path / "history.jsonl" + report = _make_report() + append_history_entry(report, history_path, sessions_count=1) + parsed = json.loads(history_path.read_text(encoding="utf-8").strip()) + assert parsed["manifest"] is None + + def test_round_trip_logical_name(self, tmp_path: Path) -> None: + """Logical name written then loaded must match the stored value.""" + history_path = tmp_path / "history.jsonl" + report = _make_report(run_id="rt-320") + manifest_file = tmp_path / "raki.yaml" + manifest_file.touch() + append_history_entry( + report, + history_path, + sessions_count=5, + manifest_file=manifest_file, + manifest_name="soda-gate", + ) + entries = load_history(history_path) + assert len(entries) == 1 + assert entries[0].manifest == "soda-gate" + + +# --------------------------------------------------------------------------- +# build_sparkline_data — filters by logical name +# --------------------------------------------------------------------------- + + +class TestBuildSparklineDataManifestName: + """Tests that build_sparkline_data filters correctly by logical manifest name.""" + + def test_filters_entries_by_logical_name(self) -> None: + """Entries with the logical project name must be selected, filename entries excluded.""" + entries = [ + make_history_entry( + run_id="r1", + manifest="my-project", + metrics={"first_pass_success_rate": 0.80}, + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + ), + make_history_entry( + run_id="r2", + manifest="raki.yaml", # old filename-based entry + metrics={"first_pass_success_rate": 0.60}, + timestamp=datetime(2026, 1, 2, tzinfo=timezone.utc), + ), + ] + result = build_sparkline_data(entries, manifest="my-project") + sd = result.get("first_pass_success_rate") + assert sd is not None + assert len(sd.values) == 1 + assert sd.values[0] == pytest.approx(0.80) + + def test_excludes_filename_entries_when_filtering_by_name(self) -> None: + """When filtering by logical name, filename-only entries are excluded.""" + entries = [ + make_history_entry( + run_id="r1", + manifest="raki.yaml", + metrics={"first_pass_success_rate": 0.60}, + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + ), + ] + result = build_sparkline_data(entries, manifest="my-project") + assert result == {} + + def test_no_filter_includes_all_entries(self) -> None: + """No manifest filter returns data from all entries.""" + entries = [ + make_history_entry( + run_id="r1", + manifest="my-project", + metrics={"first_pass_success_rate": 0.80}, + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + ), + make_history_entry( + run_id="r2", + manifest="raki.yaml", + metrics={"first_pass_success_rate": 0.60}, + timestamp=datetime(2026, 1, 2, tzinfo=timezone.utc), + ), + ] + result = build_sparkline_data(entries) + sd = result.get("first_pass_success_rate") + assert sd is not None + assert len(sd.values) == 2 + + +# --------------------------------------------------------------------------- +# compute_all_trends — filters by logical name +# --------------------------------------------------------------------------- + + +class TestComputeAllTrendsManifestName: + """Tests that compute_all_trends manifest_filter works with logical name.""" + + def test_trend_filter_by_logical_name(self) -> None: + """compute_all_trends with manifest_filter=logical_name selects matching entries.""" + entries = [ + make_history_entry( + run_id="r1", + manifest="soda-gate", + metrics={"rework_cycles": 1.5}, + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + ), + make_history_entry( + run_id="r2", + manifest="other.yaml", + metrics={"rework_cycles": 3.0}, + timestamp=datetime(2026, 1, 2, tzinfo=timezone.utc), + ), + ] + trends = compute_all_trends(entries, manifest_filter="soda-gate") + rework = next((t for t in trends if t.metric_name == "rework_cycles"), None) + assert rework is not None + assert len(rework.values) == 1 + assert rework.values[0][1] == pytest.approx(1.5) + + def test_trend_filter_excludes_other_names(self) -> None: + """Entries with a different manifest value must be excluded.""" + entries = [ + make_history_entry( + run_id="r1", + manifest="raki.yaml", + metrics={"rework_cycles": 3.0}, + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + ), + ] + trends = compute_all_trends(entries, manifest_filter="soda-gate") + assert trends == [] + + +# --------------------------------------------------------------------------- +# load_seen_session_ids — filters by logical name +# --------------------------------------------------------------------------- + + +class TestLoadSeenSessionIdsManifestName: + """Tests that load_seen_session_ids manifest filter works with logical name.""" + + def test_returns_ids_for_logical_name(self, tmp_path: Path) -> None: + """Session IDs stored under logical name are returned when filter matches.""" + history_path = tmp_path / "history.jsonl" + entry = HistoryEntry( + run_id="r1", + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + sessions_count=2, + metrics={}, + manifest="my-project", + session_ids=["sess-a", "sess-b"], + ) + line = json.dumps(entry.model_dump(mode="json"), default=str) + history_path.write_text(line + "\n") + + seen = load_seen_session_ids(history_path, manifest="my-project") + assert seen == {"sess-a", "sess-b"} + + def test_excludes_other_manifest_ids(self, tmp_path: Path) -> None: + """Session IDs from entries with different manifest must not be returned.""" + history_path = tmp_path / "history.jsonl" + entry = HistoryEntry( + run_id="r1", + timestamp=datetime(2026, 1, 1, tzinfo=timezone.utc), + sessions_count=2, + metrics={}, + manifest="raki.yaml", + session_ids=["sess-x"], + ) + line = json.dumps(entry.model_dump(mode="json"), default=str) + history_path.write_text(line + "\n") + + seen = load_seen_session_ids(history_path, manifest="my-project") + assert seen == set() + + +# --------------------------------------------------------------------------- +# find_last_matching_entry — matches by logical name +# --------------------------------------------------------------------------- + + +class TestFindLastMatchingEntryManifestName: + """Tests that find_last_matching_entry matches logical project names.""" + + def test_matches_by_logical_name(self) -> None: + """Entry with manifest=logical_name is returned when queried by logical_name.""" + entry = HistoryEntry( + run_id="r1", + timestamp=datetime(2026, 4, 1, tzinfo=timezone.utc), + sessions_count=5, + metrics={}, + manifest="soda-gate", + ) + result = find_last_matching_entry([entry], "soda-gate", 5) + assert result is not None + assert result.run_id == "r1" + + def test_does_not_match_different_name(self) -> None: + """Entry stored with filename does not match a logical name query.""" + entry = HistoryEntry( + run_id="r1", + timestamp=datetime(2026, 4, 1, tzinfo=timezone.utc), + sessions_count=5, + metrics={}, + manifest="raki.yaml", + ) + result = find_last_matching_entry([entry], "soda-gate", 5) + assert result is None From 1a8c82952e413260844c98dc38e05925f7494cd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 14:44:54 -0300 Subject: [PATCH 2/2] refactor(#320): drop underscore from effective_manifest_name, remove import alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review findings: - Rename _effective_manifest_name → effective_manifest_name (now part of public API) - Replace cryptic _eff_mfn alias in cli.py with the full descriptive name --- src/raki/cli.py | 4 ++-- src/raki/report/history.py | 4 ++-- tests/test_320_manifest_name_history.py | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/raki/cli.py b/src/raki/cli.py index 6996d3d..4321ce9 100644 --- a/src/raki/cli.py +++ b/src/raki/cli.py @@ -341,9 +341,9 @@ def run( # Compute the effective manifest identity for history filtering. # Prefer the logical ``name:`` field from the manifest YAML (stable across # file renames) and fall back to the filename when name is empty. - from raki.report.history import _effective_manifest_name as _eff_mfn + from raki.report.history import effective_manifest_name - effective_manifest_id: str | None = _eff_mfn(manifest.name, manifest_file) + effective_manifest_id: str | None = effective_manifest_name(manifest.name, manifest_file) # Validate manifest judge.provider against allowed providers early, # before it reaches MetricConfig where it would raise a raw Pydantic error. diff --git a/src/raki/report/history.py b/src/raki/report/history.py index 8a31266..490cee5 100644 --- a/src/raki/report/history.py +++ b/src/raki/report/history.py @@ -14,7 +14,7 @@ from raki.model.report import EvalReport -def _effective_manifest_name( +def effective_manifest_name( manifest_name: str | None, manifest_file: Path | None, ) -> str | None: @@ -130,7 +130,7 @@ def append_history_entry( timestamp=report.timestamp, sessions_count=sessions_count, metrics=metrics_dict, - manifest=_effective_manifest_name(manifest_name, manifest_file), + manifest=effective_manifest_name(manifest_name, manifest_file), config_hash=_config_hash(report.config), git_sha=_git_sha(), warning_count=len(report.warnings), diff --git a/tests/test_320_manifest_name_history.py b/tests/test_320_manifest_name_history.py index f0ad503..20fcead 100644 --- a/tests/test_320_manifest_name_history.py +++ b/tests/test_320_manifest_name_history.py @@ -21,7 +21,7 @@ from conftest import make_history_entry from raki.report.history import ( HistoryEntry, - _effective_manifest_name, + effective_manifest_name, append_history_entry, find_last_matching_entry, load_history, @@ -47,42 +47,42 @@ def _make_report(run_id: str = "eval-320", scores: dict | None = None): # --------------------------------------------------------------------------- -# _effective_manifest_name helper +# effective_manifest_name helper # --------------------------------------------------------------------------- class TestEffectiveManifestName: - """Tests for the _effective_manifest_name() helper function.""" + """Tests for the effective_manifest_name() helper function.""" def test_returns_manifest_name_when_non_empty(self, tmp_path: Path) -> None: """Non-empty manifest_name takes priority over the filename.""" manifest_file = tmp_path / "raki.yaml" manifest_file.touch() - result = _effective_manifest_name("my-project", manifest_file) + result = effective_manifest_name("my-project", manifest_file) assert result == "my-project" def test_falls_back_to_filename_when_manifest_name_empty(self, tmp_path: Path) -> None: """Empty string manifest_name falls back to manifest_file.name.""" manifest_file = tmp_path / "raki.yaml" manifest_file.touch() - result = _effective_manifest_name("", manifest_file) + result = effective_manifest_name("", manifest_file) assert result == "raki.yaml" def test_falls_back_to_filename_when_manifest_name_none(self, tmp_path: Path) -> None: """None manifest_name falls back to manifest_file.name.""" manifest_file = tmp_path / "raki.yaml" manifest_file.touch() - result = _effective_manifest_name(None, manifest_file) + result = effective_manifest_name(None, manifest_file) assert result == "raki.yaml" def test_returns_none_when_both_none(self) -> None: """Returns None when both manifest_name and manifest_file are None.""" - result = _effective_manifest_name(None, None) + result = effective_manifest_name(None, None) assert result is None def test_returns_manifest_name_when_file_is_none(self) -> None: """Non-empty manifest_name is returned even when manifest_file is None.""" - result = _effective_manifest_name("my-project", None) + result = effective_manifest_name("my-project", None) assert result == "my-project"