From ba4d3933bf412d1a985990b55f03ef2eaa5801d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 19:48:53 -0300 Subject: [PATCH 1/7] feat(319): add 'superseded' to PhaseResult.status Literal Extends the PhaseResult status field to include 'superseded' as a valid value alongside 'completed', 'failed', and 'skipped'. This new status represents phases that were interrupted (e.g. by a timeout) and replaced by a newer generation attempt. Also updates conftest.make_sample() verify_status type annotation to include 'superseded' for completeness. --- src/raki/model/phases.py | 2 +- tests/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/raki/model/phases.py b/src/raki/model/phases.py index d6f96bb..fe02c72 100644 --- a/src/raki/model/phases.py +++ b/src/raki/model/phases.py @@ -14,7 +14,7 @@ class ToolCall(BaseModel): class PhaseResult(BaseModel): name: str generation: int - status: Literal["completed", "failed", "skipped"] + status: Literal["completed", "failed", "skipped", "superseded"] cost_usd: float | None = None duration_ms: int | None = None tokens_in: int | None = None diff --git a/tests/conftest.py b/tests/conftest.py index 5221d45..9468947 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ def make_sample( patch_cycles: int = 0, cost: float = 10.0, verify_gen: int = 1, - verify_status: Literal["completed", "failed", "skipped"] = "completed", + verify_status: Literal["completed", "failed", "skipped", "superseded"] = "completed", findings: list[ReviewFinding] | None = None, duration_ms: int | None = None, tokens_in: int | None = None, From 192d7f69151d6dcfc3ea0b016409bd7eb19d2063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 19:49:59 -0300 Subject: [PATCH 2/7] test(319): add timeout-resume session fixture and conftest fixture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creates tests/fixtures/sessions/timeout-resume/ to model a session where implement gen-1 timed out mid-execution (no phase_completed event written, no .json.1 output file saved) and was resumed at gen-2. The events.jsonl shows phase_started(implement, gen=1) immediately followed by phase_started(implement, gen=2) with no intervening completion — the canonical pattern that _synthesize_superseded_phases will detect. Also adds the timeout_resume_dir pytest fixture to conftest.py. --- tests/conftest.py | 12 ++++++++++++ .../sessions/timeout-resume/events.jsonl | 13 +++++++++++++ .../sessions/timeout-resume/implement.json | 1 + tests/fixtures/sessions/timeout-resume/meta.json | 16 ++++++++++++++++ tests/fixtures/sessions/timeout-resume/plan.json | 1 + .../fixtures/sessions/timeout-resume/review.json | 1 + .../fixtures/sessions/timeout-resume/submit.json | 1 + .../fixtures/sessions/timeout-resume/triage.json | 1 + .../fixtures/sessions/timeout-resume/verify.json | 1 + 9 files changed, 47 insertions(+) create mode 100644 tests/fixtures/sessions/timeout-resume/events.jsonl create mode 100644 tests/fixtures/sessions/timeout-resume/implement.json create mode 100644 tests/fixtures/sessions/timeout-resume/meta.json create mode 100644 tests/fixtures/sessions/timeout-resume/plan.json create mode 100644 tests/fixtures/sessions/timeout-resume/review.json create mode 100644 tests/fixtures/sessions/timeout-resume/submit.json create mode 100644 tests/fixtures/sessions/timeout-resume/triage.json create mode 100644 tests/fixtures/sessions/timeout-resume/verify.json diff --git a/tests/conftest.py b/tests/conftest.py index 9468947..88aaea4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -151,6 +151,18 @@ def soda_session_dir(fixtures_dir: Path) -> Path: return fixtures_dir / "soda-session" +@pytest.fixture +def timeout_resume_dir(sessions_dir: Path) -> Path: + """Return the path to the timeout-resume session fixture. + + This fixture models a session where implement gen-1 timed out mid-execution + (no completion event or output file was recorded) and was automatically + resumed at gen-2. It is the canonical test case for the 'superseded' + phase status and the _synthesize_superseded_phases adapter logic. + """ + return sessions_dir / "timeout-resume" + + @pytest.fixture def manifest_with_session(tmp_path: Path, pass_simple_dir: Path) -> tuple[Path, Path]: """Create a tmp_path with a manifest and a copied pass-simple session. diff --git a/tests/fixtures/sessions/timeout-resume/events.jsonl b/tests/fixtures/sessions/timeout-resume/events.jsonl new file mode 100644 index 0000000..752dcfc --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/events.jsonl @@ -0,0 +1,13 @@ +{"timestamp":"2026-05-01T10:00:00Z","phase":"triage","kind":"phase_started","data":{"generation":1}} +{"timestamp":"2026-05-01T10:00:25Z","phase":"triage","kind":"phase_completed","data":{"cost":0.30,"duration_ms":25000}} +{"timestamp":"2026-05-01T10:00:25Z","phase":"plan","kind":"phase_started","data":{"generation":1}} +{"timestamp":"2026-05-01T10:01:25Z","phase":"plan","kind":"phase_completed","data":{"cost":0.80,"duration_ms":60000}} +{"timestamp":"2026-05-01T10:01:25Z","phase":"implement","kind":"phase_started","data":{"generation":1}} +{"timestamp":"2026-05-01T10:05:25Z","phase":"implement","kind":"phase_started","data":{"generation":2}} +{"timestamp":"2026-05-01T10:09:25Z","phase":"implement","kind":"phase_completed","data":{"cost":3.20,"duration_ms":240000}} +{"timestamp":"2026-05-01T10:09:25Z","phase":"verify","kind":"phase_started","data":{"generation":1}} +{"timestamp":"2026-05-01T10:10:55Z","phase":"verify","kind":"phase_completed","data":{"cost":1.10,"duration_ms":90000,"summary":"PASS"}} +{"timestamp":"2026-05-01T10:10:55Z","phase":"review","kind":"phase_started","data":{"generation":1}} +{"timestamp":"2026-05-01T10:12:45Z","phase":"review","kind":"phase_completed","data":{"cost":1.50,"duration_ms":110000}} +{"timestamp":"2026-05-01T10:12:45Z","phase":"submit","kind":"phase_started","data":{"generation":1}} +{"timestamp":"2026-05-01T10:13:15Z","phase":"submit","kind":"phase_completed","data":{"cost":0.60,"duration_ms":30000}} diff --git a/tests/fixtures/sessions/timeout-resume/implement.json b/tests/fixtures/sessions/timeout-resume/implement.json new file mode 100644 index 0000000..a3760c5 --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/implement.json @@ -0,0 +1 @@ +{"ticket_key": "timeout-01", "branch": "soda/timeout-01", "commits": [{"hash": "abc1234", "message": "feat: add retry logic for transient HTTP errors [#timeout-01]", "task_id": "T1"}], "files_changed": [{"path": "src/http_client.py", "action": "modified"}, {"path": "tests/test_http_client.py", "action": "created"}], "task_results": [{"task_id": "T1", "status": "completed"}, {"task_id": "T2", "status": "completed"}], "tests_passed": true, "test_output": "12 passed in 0.08s", "deviations": []} diff --git a/tests/fixtures/sessions/timeout-resume/meta.json b/tests/fixtures/sessions/timeout-resume/meta.json new file mode 100644 index 0000000..1ba86d9 --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/meta.json @@ -0,0 +1,16 @@ +{ + "ticket": "timeout-01", + "summary": "Fix: add retry logic for transient HTTP errors", + "branch": "soda/timeout-01", + "started_at": "2026-05-01T10:00:00Z", + "total_cost": 8.5, + "rework_cycles": 1, + "phases": { + "triage": {"status": "completed", "cost": 0.30, "duration_ms": 25000, "generation": 1}, + "plan": {"status": "completed", "cost": 0.80, "duration_ms": 60000, "generation": 1}, + "implement": {"status": "completed", "cost": 3.20, "duration_ms": 240000, "generation": 2}, + "verify": {"status": "completed", "cost": 1.10, "duration_ms": 90000, "generation": 1}, + "review": {"status": "completed", "cost": 1.50, "duration_ms": 110000, "generation": 1}, + "submit": {"status": "completed", "cost": 0.60, "duration_ms": 30000, "generation": 1} + } +} diff --git a/tests/fixtures/sessions/timeout-resume/plan.json b/tests/fixtures/sessions/timeout-resume/plan.json new file mode 100644 index 0000000..56a991e --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/plan.json @@ -0,0 +1 @@ +{"ticket_key": "timeout-01", "approach": "implement retry logic using tenacity library", "tasks": [{"task_id": "T1", "description": "Add retry decorator to HTTP client", "files": ["src/http_client.py"]}, {"task_id": "T2", "description": "Add unit tests", "files": ["tests/test_http_client.py"]}]} diff --git a/tests/fixtures/sessions/timeout-resume/review.json b/tests/fixtures/sessions/timeout-resume/review.json new file mode 100644 index 0000000..aefd640 --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/review.json @@ -0,0 +1 @@ +{"ticket_key": "timeout-01", "verdict": "approve", "findings": []} diff --git a/tests/fixtures/sessions/timeout-resume/submit.json b/tests/fixtures/sessions/timeout-resume/submit.json new file mode 100644 index 0000000..3796dd5 --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/submit.json @@ -0,0 +1 @@ +{"ticket_key": "timeout-01", "branch": "soda/timeout-01", "target": "main", "pr_url": "https://github.com/example/repo/pull/42", "title": "feat: add retry logic for transient HTTP errors"} diff --git a/tests/fixtures/sessions/timeout-resume/triage.json b/tests/fixtures/sessions/timeout-resume/triage.json new file mode 100644 index 0000000..1ef0be8 --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/triage.json @@ -0,0 +1 @@ +{"ticket_key": "timeout-01", "approach": "add retry decorator with exponential backoff", "complexity": "medium", "code_area": "network/http_client.py", "files": ["src/http_client.py", "tests/test_http_client.py"], "risks": ["regression in retry timing"]} diff --git a/tests/fixtures/sessions/timeout-resume/verify.json b/tests/fixtures/sessions/timeout-resume/verify.json new file mode 100644 index 0000000..62560e8 --- /dev/null +++ b/tests/fixtures/sessions/timeout-resume/verify.json @@ -0,0 +1 @@ +{"ticket_key": "timeout-01", "verdict": "PASS", "command_results": [{"command": "python -m pytest tests/", "exit_code": 0, "passed": true}], "criteria_results": [{"criterion": "retry on transient HTTP 503", "passed": true, "evidence": "test_retry_on_503 passes"}]} From 669268952a515fff993e08c3d73babbb3f6e62fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 19:57:14 -0300 Subject: [PATCH 3/7] feat(319): implement _synthesize_superseded_phases in SessionSchemaAdapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds SessionSchemaAdapter._synthesize_superseded_phases() which detects phase-start events that were never followed by a completion or failure before the same phase restarted at a higher generation (timeout/resume pattern). Algorithm: for each phase name, scan consecutive phase_started pairs; if no phase_completed or phase_failed exists between them, the earlier generation was superseded. The synthesized PhaseResult carries status='superseded' and empty output (no data was captured for the interrupted run). Guards: - Does not synthesize when the lower generation IS present on disk (normal rework cycle: both gen-1 and gen-2 files exist). - Does not synthesize when an intervening phase_completed or phase_failed event was recorded (soda-session: gen-1 completed with FAIL before gen-2 was launched). - Does not synthesize for custom event kinds like review_merged (rework-cycle fixture: review ended via review_merged, never restarted — not superseded). Refs: #319 --- src/raki/adapters/session_schema.py | 75 +++++++++++++++++++++++++++++ tests/test_adapters.py | 69 +++++++++++++++++++++++++- 2 files changed, 142 insertions(+), 2 deletions(-) diff --git a/src/raki/adapters/session_schema.py b/src/raki/adapters/session_schema.py index 93809b2..89e7660 100644 --- a/src/raki/adapters/session_schema.py +++ b/src/raki/adapters/session_schema.py @@ -172,8 +172,83 @@ def _load_phases( phases: list[PhaseResult] = [] for phase_name in PHASE_NAMES: phases.extend(self._load_phase_files(source, phase_name, meta_raw, events)) + phases.extend(self._synthesize_superseded_phases(phases, events)) return phases + @staticmethod + def _synthesize_superseded_phases( + phases: list[PhaseResult], + events: list[SessionEvent], + ) -> list[PhaseResult]: + """Detect and synthesize superseded PhaseResult entries from the event log. + + A phase at generation G is considered 'superseded' when ALL of these + hold: + + 1. A ``phase_started`` event exists for ``(phase=P, gen=G)``. + 2. A subsequent ``phase_started`` event exists for ``(phase=P, gen=G\')``, + i.e. the same phase restarted at a higher generation. + 3. No ``phase_completed`` or ``phase_failed`` event exists for phase P + *between* the two consecutive ``phase_started`` events. This + distinguishes a clean handoff (gen-1 completed, gen-2 is a rework + cycle) from an interrupted handoff (gen-1 timed out, gen-2 resumed). + 4. No loaded :class:`PhaseResult` already exists for ``(phase=P, gen=G)`` + — avoids duplicating a phase whose output file was recorded on disk. + + The synthesized phases carry ``status="superseded"``, an empty + ``output``, and no cost/duration/token metadata (none was captured). + """ + # Index already-loaded (phase_name, generation) pairs to avoid duplication. + loaded_set: set[tuple[str, int]] = {(phase.name, phase.generation) for phase in phases} + + synthesized: list[PhaseResult] = [] + + # Collect distinct phase names that appear in events (preserving order). + seen_phase_names: set[str] = set() + ordered_phase_names: list[str] = [] + for event in events: + if event.phase is not None and event.phase not in seen_phase_names: + seen_phase_names.add(event.phase) + ordered_phase_names.append(event.phase) + + for phase_name in ordered_phase_names: + # Extract only the events for this phase, in event-stream order. + phase_events = [event for event in events if event.phase == phase_name] + + # Locate all phase_started events together with their slice index + # so we can inspect what lies between consecutive starts. + started_indices: list[tuple[int, int]] = [ + (idx, event.data.get("generation", 1)) + for idx, event in enumerate(phase_events) + if event.kind == "phase_started" + and isinstance(event.data.get("generation", 1), int) + ] + + # Examine consecutive pairs of starts. If nothing completes or + # fails between the first and second start, the first generation + # was interrupted (superseded). + for pair_idx in range(len(started_indices) - 1): + first_event_idx, first_gen = started_indices[pair_idx] + second_event_idx, _ = started_indices[pair_idx + 1] + + # Scan the events between the two starts. + has_completion = any( + phase_events[between_idx].kind in ("phase_completed", "phase_failed") + for between_idx in range(first_event_idx + 1, second_event_idx) + ) + + if not has_completion and (phase_name, first_gen) not in loaded_set: + synthesized.append( + PhaseResult( + name=phase_name, + generation=first_gen, + status="superseded", + output="", + ) + ) + + return synthesized + def _load_phase_files( self, source: Path, diff --git a/tests/test_adapters.py b/tests/test_adapters.py index 3a7bfac..fe25634 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -1279,8 +1279,8 @@ def test_load_directory_with_valid_adapter_name(self, sessions_dir): registry.register(SessionSchemaAdapter()) loader = DatasetLoader(registry) dataset = loader.load_directory(sessions_dir, adapter_name="session-schema") - # pass-simple and rework-cycle are valid session dirs under sessions/ - assert len(dataset.samples) == 2 + # pass-simple, rework-cycle, and timeout-resume are valid session dirs + assert len(dataset.samples) == 3 def test_load_directory_with_invalid_adapter_name(self, sessions_dir): registry = AdapterRegistry() @@ -3565,3 +3565,68 @@ def test_session_meta_adapter_format_defaults_to_empty_string(): rework_cycles=0, ) assert meta.adapter_format == "" + + +# --- Ticket #319: superseded phase synthesis --- + + +TIMEOUT_RESUME_FIXTURE = FIXTURES / "timeout-resume" + + +def test_superseded_phase_synthesized_for_timeout_resume(timeout_resume_dir: Path): + """Adapter synthesizes a superseded PhaseResult for the timed-out gen-1 implement.""" + adapter = SessionSchemaAdapter() + sample = adapter.load(timeout_resume_dir) + superseded = [phase for phase in sample.phases if phase.status == "superseded"] + assert len(superseded) == 1 + assert superseded[0].name == "implement" + assert superseded[0].generation == 1 + + +def test_superseded_phase_does_not_duplicate_existing_phase(timeout_resume_dir: Path): + """Non-missing generations (gen-2 implement file exists) are NOT synthesized as superseded.""" + adapter = SessionSchemaAdapter() + sample = adapter.load(timeout_resume_dir) + impl_phases = [phase for phase in sample.phases if phase.name == "implement"] + # Should have exactly 2: implement(gen=1, superseded) + implement(gen=2, completed) + assert len(impl_phases) == 2 + statuses = {phase.generation: phase.status for phase in impl_phases} + assert statuses[1] == "superseded" + assert statuses[2] == "completed" + + +def test_no_spurious_superseded_on_normal_rework(rework_cycle_dir: Path): + """Normal rework cycles (both gen files present) must NOT produce superseded phases.""" + adapter = SessionSchemaAdapter() + sample = adapter.load(rework_cycle_dir) + superseded = [phase for phase in sample.phases if phase.status == "superseded"] + assert superseded == [], f"Expected no superseded phases, got {superseded}" + + +def test_no_spurious_superseded_on_soda_session(soda_session_dir: Path): + """SODA rework session (gen-1 completed, gen-2 replaces it) must NOT produce superseded.""" + adapter = SessionSchemaAdapter() + sample = adapter.load(soda_session_dir) + superseded = [phase for phase in sample.phases if phase.status == "superseded"] + assert superseded == [], f"Expected no superseded phases, got {superseded}" + + +def test_superseded_phase_has_empty_output(timeout_resume_dir: Path): + """Synthesized superseded phases must have an empty output string.""" + adapter = SessionSchemaAdapter() + sample = adapter.load(timeout_resume_dir) + superseded = [phase for phase in sample.phases if phase.status == "superseded"] + assert len(superseded) == 1 + assert superseded[0].output == "" + + +def test_superseded_synthesis_respects_completion_event(soda_session_dir: Path): + """When phase_completed exists for gen-1, no superseded phase is synthesized.""" + # soda-session has phase_completed for implement gen-1 in events.jsonl + adapter = SessionSchemaAdapter() + sample = adapter.load(soda_session_dir) + impl_phases = [phase for phase in sample.phases if phase.name == "implement"] + # gen-1 file is implement.json.1 in the soda-session fixture + # No superseded phase should be synthesized because gen-1 completed + superseded = [phase for phase in impl_phases if phase.status == "superseded"] + assert superseded == [] From 5f3102ce8266a9abc721c40ebef27fa76b9ad7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 19:59:12 -0300 Subject: [PATCH 4/7] fix(319): sort_phases accounts for superseded phases in timeout-resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this fix, a session with implement(gen=2) but no gen-1 file would have sort keys: verify(gen=1)=(1,4) < implement(gen=2)=(2,2), placing submit and verify *before* the replaced implement — the wrong chronological order. The fix detects the 'superseded boundary': the highest PHASE_ORDER index of any superseded phase. Gen-1 phases whose PHASE_ORDER index lies *after* that boundary (i.e. they ran after the resumed generation) are given an effective generation of max_non_superseded_gen+1, sorting them last. Pre-superseded gen-1 phases (triage, plan) and normal rework cycles (no superseded phases) are unaffected. Also adds three TestSortPhases cases: - test_superseded_phase_sorts_before_replacement - test_superseded_does_not_affect_normal_rework_order - test_pre_superseded_gen1_phases_sort_in_gen1_block Refs: #319 --- src/raki/report/html_report.py | 39 +++++++++++++++++++++-- tests/test_report_html.py | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/src/raki/report/html_report.py b/src/raki/report/html_report.py index 09974c4..0c62f8c 100644 --- a/src/raki/report/html_report.py +++ b/src/raki/report/html_report.py @@ -226,6 +226,14 @@ def sort_phases( pipeline sequence). If ``pipeline_phases`` contains names not in ``PHASE_ORDER`` (e.g. Alcove bridge steps), those are appended after the known phases. + + **Superseded-phase grouping** (timeout-resume pattern): when a phase at + generation 1 is marked ``status="superseded"``, all non-superseded + generation-1 phases that come *after* the superseded phase in + ``PHASE_ORDER`` are bumped to ``max_non_superseded_generation + 1`` for + sort purposes. This preserves the chronological truth: verify/review/submit + at gen-1 actually executed *after* the replacement implement(gen-2), not + before it. """ order_index: dict[str, int] = {name: idx for idx, name in enumerate(PHASE_ORDER)} if pipeline_phases: @@ -235,11 +243,36 @@ def sort_phases( order_index[name] = next_idx next_idx += 1 fallback = max(order_index.values(), default=0) + 1 - return sorted( - phases, - key=lambda phase: (phase.generation, order_index.get(phase.name, fallback)), + + # Detect the superseded-phase boundary: the highest PHASE_ORDER index of + # any superseded phase. -1 when there are no superseded phases. + superseded_boundary: int = -1 + for phase in phases: + if phase.status == "superseded": + phase_idx = order_index.get(phase.name, fallback) + if phase_idx > superseded_boundary: + superseded_boundary = phase_idx + + # The highest generation seen in non-superseded phases (used to compute + # the effective generation for post-superseded gen-1 phases). + max_non_superseded_gen: int = max( + (phase.generation for phase in phases if phase.status != "superseded"), + default=1, ) + def _sort_key(phase: PhaseResult) -> tuple[int, int]: + phase_idx = order_index.get(phase.name, fallback) + if phase.status == "superseded": + # Superseded phases sort within their own generation. + return (phase.generation, phase_idx) + if phase.generation == 1 and superseded_boundary >= 0 and phase_idx > superseded_boundary: + # Gen-1 phases that come after a superseded phase in the pipeline + # actually ran *after* the replacement generation — sort them last. + return (max_non_superseded_gen + 1, phase_idx) + return (phase.generation, phase_idx) + + return sorted(phases, key=_sort_key) + def _get_metric_meta(name: str) -> dict[str, str | bool]: """Look up metadata for a metric, falling back to sensible defaults.""" diff --git a/tests/test_report_html.py b/tests/test_report_html.py index ae29ea8..5da64a3 100644 --- a/tests/test_report_html.py +++ b/tests/test_report_html.py @@ -2803,6 +2803,62 @@ def test_chronological_interleaving(self) -> None: ("review", 2), ] + def test_superseded_phase_sorts_before_replacement(self) -> None: + """Timeout-resume: impl(1,superseded) → impl(2) → verify(1) → submit(1).""" + from raki.report.html_report import sort_phases + + impl_gen1_superseded = self._make_phase("implement", generation=1, status="superseded") + impl_gen2 = self._make_phase("implement", generation=2) + verify_gen1 = self._make_phase("verify", generation=1) + submit_gen1 = self._make_phase("submit", generation=1) + + sorted_phases = sort_phases([submit_gen1, verify_gen1, impl_gen2, impl_gen1_superseded]) + names_gens_status = [(ph.name, ph.generation, ph.status) for ph in sorted_phases] + assert names_gens_status == [ + ("implement", 1, "superseded"), + ("implement", 2, "completed"), + ("verify", 1, "completed"), + ("submit", 1, "completed"), + ] + + def test_superseded_does_not_affect_normal_rework_order(self) -> None: + """Normal rework (no superseded) must keep the original (gen, phase_order) sort.""" + from raki.report.html_report import sort_phases + + impl1 = self._make_phase("implement", generation=1) + verify1_failed = self._make_phase("verify", generation=1, status="failed") + impl2 = self._make_phase("implement", generation=2) + verify2 = self._make_phase("verify", generation=2) + + sorted_phases = sort_phases([verify2, impl2, verify1_failed, impl1]) + names_gens = [(ph.name, ph.generation) for ph in sorted_phases] + assert names_gens == [ + ("implement", 1), + ("verify", 1), + ("implement", 2), + ("verify", 2), + ] + + def test_pre_superseded_gen1_phases_sort_in_gen1_block(self) -> None: + """Gen-1 phases that come BEFORE the superseded phase stay in the gen-1 block.""" + from raki.report.html_report import sort_phases + + triage = self._make_phase("triage", generation=1) + plan = self._make_phase("plan", generation=1) + impl_superseded = self._make_phase("implement", generation=1, status="superseded") + impl2 = self._make_phase("implement", generation=2) + verify = self._make_phase("verify", generation=1) + + sorted_phases = sort_phases([verify, impl2, impl_superseded, plan, triage]) + names_gens = [(ph.name, ph.generation) for ph in sorted_phases] + assert names_gens == [ + ("triage", 1), + ("plan", 1), + ("implement", 1), # superseded — stays in gen-1 block + ("implement", 2), + ("verify", 1), # post-superseded — bumped to sort after gen-2 + ] + class TestPhaseTimelineDotColoring: """Phase status dots use correct colors: green gen-1, yellow rework (gen>1), red failed.""" From d0d5280e4d375164ad101a82362c69474516de9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 20:01:09 -0300 Subject: [PATCH 5/7] style(319): add phase-status-superseded CSS rule to HTML report template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds .phase-status-superseded { background: var(--text-muted); opacity: 0.5; border: 1px solid var(--yellow); } to the phase timeline section. The superseded dot is visually distinct: it uses the muted gray background (same hue as 'skipped') at reduced opacity, with a yellow border suggesting 'was in progress when replaced'. This lets readers immediately recognise a timed-out phase without confusing it with a normal failure. The dot_class Jinja2 expression already routes superseded phases through its 'else phase.status' branch, so no template logic change was needed — only the missing CSS rule. Also adds two TestPhaseTimelineDotColoring tests: - test_superseded_phase_has_superseded_dot - test_superseded_phase_css_rule_defined Refs: #319 --- src/raki/report/templates/report.html.j2 | 9 ++++---- tests/test_report_html.py | 28 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/raki/report/templates/report.html.j2 b/src/raki/report/templates/report.html.j2 index 84bd9aa..59f4a73 100644 --- a/src/raki/report/templates/report.html.j2 +++ b/src/raki/report/templates/report.html.j2 @@ -641,10 +641,11 @@ border-radius: 50%; } - .phase-status-completed { background: var(--green); } - .phase-status-rework { background: var(--yellow); } - .phase-status-failed { background: var(--red); } - .phase-status-skipped { background: var(--text-muted); } + .phase-status-completed { background: var(--green); } + .phase-status-rework { background: var(--yellow); } + .phase-status-failed { background: var(--red); } + .phase-status-skipped { background: var(--text-muted); } + .phase-status-superseded { background: var(--text-muted); opacity: 0.5; border: 1px solid var(--yellow); } .phase-duration { color: var(--text-muted); diff --git a/tests/test_report_html.py b/tests/test_report_html.py index 5da64a3..4805874 100644 --- a/tests/test_report_html.py +++ b/tests/test_report_html.py @@ -2937,6 +2937,34 @@ def test_skipped_phase_has_skipped_dot(self, tmp_path: Path) -> None: content = output.read_text() assert 'class="phase-status phase-status-skipped"' in content + def test_superseded_phase_has_superseded_dot(self, tmp_path: Path) -> None: + """Superseded phases should use the phase-status-superseded CSS class.""" + from raki.report.html_report import write_html_report + + report = self._make_report_with_phases( + [{"name": "implement", "generation": 1, "status": "superseded"}] + ) + output = tmp_path / "report.html" + write_html_report(report, output, include_sessions=True) + content = output.read_text() + assert 'class="phase-status phase-status-superseded"' in content + # Must not accidentally use rework or failed dots for superseded phases. + assert 'class="phase-status phase-status-rework"' not in content + assert 'class="phase-status phase-status-failed"' not in content + + def test_superseded_phase_css_rule_defined(self, tmp_path: Path) -> None: + """The .phase-status-superseded CSS rule must exist in the rendered HTML.""" + from raki.report.html_report import write_html_report + + report = self._make_report_with_phases( + [{"name": "implement", "generation": 1, "status": "superseded"}] + ) + output = tmp_path / "report.html" + write_html_report(report, output, include_sessions=True) + content = output.read_text() + # The CSS selector must be defined so the dot is actually styled. + assert ".phase-status-superseded" in content + # --- Ticket #250: Structured drill-down sections --- From a65040f46195729d6b8ae5dad7445f12d47ab855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 20:02:33 -0300 Subject: [PATCH 6/7] =?UTF-8?q?test(319):=20regression=20guard=20=E2=80=94?= =?UTF-8?q?=20superseded=20phases=20do=20not=20trigger=20fail=20verdict?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds explicit tests confirming that PhaseResult.status='superseded' is NOT treated as status='failed' by determine_verdict() and build_detail(): - TestDetermineVerdict.test_superseded_phase_does_not_trigger_fail_verdict: A session with a superseded gen-1 implement + completed gen-2 implement must receive verdict 'rework' (not 'fail'). - TestBuildDetail.test_superseded_phase_does_not_appear_in_fail_detail: build_detail() must not produce a string containing 'failed' when the only non-completed phase is superseded. No production code change was required: the existing status == 'failed' comparisons already exclude 'superseded'. A Note: comment has been added to determine_verdict's docstring to make this intent explicit. Refs: #319 --- src/raki/report/html_report.py | 5 +++ tests/test_report_html.py | 60 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/src/raki/report/html_report.py b/src/raki/report/html_report.py index 0c62f8c..4912926 100644 --- a/src/raki/report/html_report.py +++ b/src/raki/report/html_report.py @@ -310,6 +310,11 @@ def determine_verdict(sample: EvalSample) -> Literal["pass", "rework", "fail"]: """Determine the verdict for a session sample. Logic: failed phase -> fail, rework_cycles > 0 -> rework, else pass. + + Note: ``status="superseded"`` phases (ticket #319 — timeout-resume pattern) + are intentionally NOT treated as failures. A superseded phase is an + interrupted-but-replaced attempt; only ``status="failed"`` triggers the + fail verdict. """ for phase in sample.phases: if phase.status == "failed": diff --git a/tests/test_report_html.py b/tests/test_report_html.py index 4805874..a7df144 100644 --- a/tests/test_report_html.py +++ b/tests/test_report_html.py @@ -1794,6 +1794,38 @@ def test_fail_takes_precedence_over_rework(self) -> None: sample = make_sample("s1", rework_cycles=2, verify_status="failed") assert determine_verdict(sample) == "fail" + def test_superseded_phase_does_not_trigger_fail_verdict(self) -> None: + """Superseded phases must not count as a failure for the verdict. + + Regression guard for ticket #319: status='superseded' is NOT + status='failed', so a session with only superseded phases and no + actual failures should receive verdict 'rework' (or 'pass'), never + 'fail'. + """ + from raki.model.dataset import EvalSample, SessionMeta + from raki.model.phases import PhaseResult + from raki.report.html_report import determine_verdict + + meta = SessionMeta( + session_id="s-superseded", + started_at=__import__("datetime").datetime( + 2026, 5, 1, tzinfo=__import__("datetime").timezone.utc + ), + total_phases=3, + rework_cycles=1, # timeout counts as a rework + ) + phases = [ + PhaseResult(name="implement", generation=1, status="superseded", output=""), + PhaseResult(name="implement", generation=2, status="completed", output="done"), + PhaseResult(name="verify", generation=1, status="completed", output="PASS"), + ] + sample = EvalSample(session=meta, phases=phases, findings=[], events=[]) + verdict = determine_verdict(sample) + assert verdict != "fail", ( + f"Superseded phase incorrectly triggered a 'fail' verdict: got '{verdict}'" + ) + assert verdict == "rework" + class TestBuildDetail: """build_detail: "implement failed" / "2 cycles" / "5 phases".""" @@ -1822,6 +1854,34 @@ def test_detail_for_pass(self) -> None: detail = build_detail(sample) assert "phases" in detail + def test_superseded_phase_does_not_appear_in_fail_detail(self) -> None: + """build_detail must not report a superseded phase as 'failed'. + + Regression guard for ticket #319. + """ + from raki.model.dataset import EvalSample, SessionMeta + from raki.model.phases import PhaseResult + from raki.report.html_report import build_detail + + import datetime + + meta = SessionMeta( + session_id="s-superseded", + started_at=datetime.datetime(2026, 5, 1, tzinfo=datetime.timezone.utc), + total_phases=3, + rework_cycles=1, + ) + phases = [ + PhaseResult(name="implement", generation=1, status="superseded", output=""), + PhaseResult(name="implement", generation=2, status="completed", output="done"), + PhaseResult(name="verify", generation=1, status="completed", output="PASS"), + ] + sample = EvalSample(session=meta, phases=phases, findings=[], events=[]) + detail = build_detail(sample) + assert "failed" not in detail, ( + f"Superseded phase caused 'failed' to appear in detail: '{detail}'" + ) + class TestComputeDuration: """_compute_duration: sum phase durations in seconds.""" From 386be6482604caaaa368b89f1649c3b16e56c01e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20=22decko=22=20de=20Brito?= Date: Tue, 26 May 2026 20:02:50 -0300 Subject: [PATCH 7/7] chore(319): add towncrier changelog fragment Refs: #319 --- changes/319.fix | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changes/319.fix diff --git a/changes/319.fix b/changes/319.fix new file mode 100644 index 0000000..5bdbacb --- /dev/null +++ b/changes/319.fix @@ -0,0 +1,5 @@ +HTML report now correctly displays timed-out (superseded) phases. Sessions where a phase +was interrupted by a timeout and restarted at a higher generation (timeout-resume pattern) +now show a synthesised ``superseded`` phase entry in the timeline with a distinct status +dot. The phase timeline is also sorted correctly: post-superseded gen-1 phases (verify, +review, submit) appear after the replacement generation rather than before it.