From e4f539fca72649cbd8ad5eb68eebcbb12ddaf8e2 Mon Sep 17 00:00:00 2001 From: grunch Date: Mon, 27 Apr 2026 19:28:49 -0300 Subject: [PATCH 1/9] [Spec Kit] Add cooperative self-resolution feature spec + plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Feature 005 — Cooperative Self-Resolution Nudge. When Serbero classifies a mediation case as coordination_failure_resolvable with high confidence, send a templated, language-matched invitation to each party in the existing chat transport, inviting them to coordinate the resolution among themselves with an explicit human-assistance opt-in. The invitation does not name, instruct, or imply any fund-moving action; the Phase 3 policy boundary remains intact. Artifacts (specs/005-cooperative-self-resolution/): - spec.md — 14 FRs across 3 user stories (US1 cooperative happy path P1, US2 human-assistance opt-in P1, US3 no-lock-in P2), 7 success criteria, edge cases, assumptions, dependencies, out-of-scope. - plan.md — Technical Context, Constitution Check (all 13 principles pass), Project Structure (one new prompt bundle, three Rust enum variants, two config keys, one state-machine edge, six test files; no migration, no new crates). - research.md — Phase 0 design decisions and best-practice references (templates static not LLM-authored, bundle integration via existing PromptBundle loader, threshold global, one-shot guard via audit row, opt-in via classifier- output extension, summary fires immediately, threshold inclusivity, multilingual append-only). - data-model.md — Phase 1 data shapes (zero schema changes; +3 enum variants, +2 config keys, +1 state-machine edge, +1 audit payload schema). - quickstart.md — operator walkthrough per user story plus SC-007 byte-for-byte verification recipe. - contracts/template-bundle.md — bundle file format, banned-substring matrix per language, recipe for adding a language. - contracts/audit-events.md — sequence invariants for self_resolution_offered + reuse of summary_generated, escalation_recommended. - contracts/config.md — operator-facing TOML keys, validation, recipes, downgrade path. - contracts/classifier-output.md — additive human_requested field, prompt instruction, provider-coverage matrix, backward compat. - checklists/requirements.md — spec quality checklist (all items pass). CLAUDE.md and .specify/feature.json regenerated to reflect the new active feature (005). --- .specify/feature.json | 2 +- CLAUDE.md | 6 +- .../checklists/requirements.md | 60 ++++ .../contracts/audit-events.md | 147 ++++++++ .../contracts/classifier-output.md | 132 +++++++ .../contracts/config.md | 121 +++++++ .../contracts/template-bundle.md | 135 ++++++++ .../data-model.md | 293 ++++++++++++++++ specs/005-cooperative-self-resolution/plan.md | 196 +++++++++++ .../quickstart.md | 243 +++++++++++++ .../research.md | 309 +++++++++++++++++ specs/005-cooperative-self-resolution/spec.md | 325 ++++++++++++++++++ 12 files changed, 1966 insertions(+), 3 deletions(-) create mode 100644 specs/005-cooperative-self-resolution/checklists/requirements.md create mode 100644 specs/005-cooperative-self-resolution/contracts/audit-events.md create mode 100644 specs/005-cooperative-self-resolution/contracts/classifier-output.md create mode 100644 specs/005-cooperative-self-resolution/contracts/config.md create mode 100644 specs/005-cooperative-self-resolution/contracts/template-bundle.md create mode 100644 specs/005-cooperative-self-resolution/data-model.md create mode 100644 specs/005-cooperative-self-resolution/plan.md create mode 100644 specs/005-cooperative-self-resolution/quickstart.md create mode 100644 specs/005-cooperative-self-resolution/research.md create mode 100644 specs/005-cooperative-self-resolution/spec.md diff --git a/.specify/feature.json b/.specify/feature.json index 89034f1..d9d257d 100644 --- a/.specify/feature.json +++ b/.specify/feature.json @@ -1,3 +1,3 @@ { - "feature_directory": "specs/004-escalation-execution" + "feature_directory": "specs/005-cooperative-self-resolution" } diff --git a/CLAUDE.md b/CLAUDE.md index a4f3342..a21c135 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,10 +1,12 @@ # cancerbero Development Guidelines -Auto-generated from all feature plans. Last updated: 2026-04-23 +Auto-generated from all feature plans. Last updated: 2026-04-27 ## Active Technologies - Rust stable, edition 2021 (same toolchain as Phases 1/2/3). + `nostr-sdk 0.44.1` (gift-wrap transport), `mostro-core 0.9.1`, `rusqlite` (bundled, now via migration v4), `tokio` (existing runtime), `serde` + `serde_json` (HandoffPackage round-trip + DM body), `uuid` (dispatch_id v4), `tracing`, `thiserror`. No new crate pulls. (004-escalation-execution) - SQLite. One new table (`escalation_dispatches`), four new `mediation_events.kind` values. Migration v4 extends the existing migrations chain in `src/db/migrations.rs`. (004-escalation-execution) +- Rust stable, edition 2021 (same toolchain as Phases 1/2/3 and Phase 4). + `nostr-sdk 0.44.1` (gift-wrap transport, reused), `mostro-core 0.9.1`, `rusqlite` (bundled, no new migration needed), `tokio` (existing runtime), `serde` + `serde_json` (config + classifier output deserialisation), `tracing`, `thiserror`. **No new crate pulls.** The reasoning provider trait already supports the existing classification round trip; the new `human_requested: bool` field rides through it as an additive struct field. (005-cooperative-self-resolution) +- SQLite. **No migration.** The new audit event kind reuses the existing `mediation_events` table (kind is TEXT). The new policy decision variant and the new escalation trigger variant are pure Rust enum extensions. (005-cooperative-self-resolution) Currently shipped in `main` (Phases 1 and 2 implemented): @@ -42,10 +44,10 @@ cargo test && cargo clippy Rust (stable, edition 2021): Follow standard conventions ## Recent Changes +- 005-cooperative-self-resolution: Added Rust stable, edition 2021 (same toolchain as Phases 1/2/3 and Phase 4). + `nostr-sdk 0.44.1` (gift-wrap transport, reused), `mostro-core 0.9.1`, `rusqlite` (bundled, no new migration needed), `tokio` (existing runtime), `serde` + `serde_json` (config + classifier output deserialisation), `tracing`, `thiserror`. **No new crate pulls.** The reasoning provider trait already supports the existing classification round trip; the new `human_requested: bool` field rides through it as an additive struct field. - 004-escalation-execution: Added Rust stable, edition 2021 (same toolchain as Phases 1/2/3). + `nostr-sdk 0.44.1` (gift-wrap transport), `mostro-core 0.9.1`, `rusqlite` (bundled, now via migration v4), `tokio` (existing runtime), `serde` + `serde_json` (HandoffPackage round-trip + DM body), `uuid` (dispatch_id v4), `tracing`, `thiserror`. No new crate pulls. - 003-guided-mediation: Added Rust (stable, edition 2021) — same toolchain as Phases 1 and 2. + `nostr-sdk 0.44.1`, `mostro-core 0.9.1`, `rusqlite` (bundled), `tokio`, `serde`, `toml`, `tracing`. **New for Phase 3**: `reqwest` (HTTP client for reasoning providers), `sha2` (prompt-bundle hashing and rationale reference ids), `uuid` (session ids). -- main: Added Rust (stable, edition 2021) + nostr-sdk 0.44.1, mostro-core 0.9.1, rusqlite, tokio, serde, toml, tracing diff --git a/specs/005-cooperative-self-resolution/checklists/requirements.md b/specs/005-cooperative-self-resolution/checklists/requirements.md new file mode 100644 index 0000000..629a127 --- /dev/null +++ b/specs/005-cooperative-self-resolution/checklists/requirements.md @@ -0,0 +1,60 @@ +# Specification Quality Checklist: Cooperative Self-Resolution Nudge + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-04-27 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Validation Notes + +- Initial validation pass on 2026-04-27. All items pass. +- Spec deliberately avoids naming Rust types (`MediationEventKind::SelfResolutionOffered`, + `EscalationTrigger::PartyRequestedHuman`, `policy::evaluate`, etc.) and code-level + prompt file paths. Those will surface in `plan.md` and `data-model.md` during + `/speckit.plan`. +- The 0.75 confidence threshold default is included as a numeric value in FR-010 + because the spec explicitly says it MUST be configurable; the value is a + business-facing default, not an implementation detail. +- Three [NEEDS CLARIFICATION] candidates were considered and resolved with + reasonable defaults rather than asking the user, per the speckit "max 3 + markers, prefer informed guesses" guidance: + 1. *What happens when both parties go silent after the invitation?* — + resolved by SC-004 (silence-rate budget + revisit-phrasing trigger) and + by FR-007 (the existing solver summary still fires, so the human can + intervene). + 2. *Is the threshold global or per-classification?* — resolved as global + (FR-010), since the new branch only fires for the cooperative label + anyway, making per-label tuning moot for now. + 3. *Should the solver summary be delayed when the invitation fires?* — + resolved as "no, fire immediately" via FR-007. Audit completeness + beats solver inbox volume; solvers can filter on the + "self-resolution offered" marker. + +## Notes + +- Items marked incomplete require spec updates before `/speckit.clarify` or `/speckit.plan`. +- All checklist items currently pass; spec is ready for the next phase. diff --git a/specs/005-cooperative-self-resolution/contracts/audit-events.md b/specs/005-cooperative-self-resolution/contracts/audit-events.md new file mode 100644 index 0000000..dad7dc3 --- /dev/null +++ b/specs/005-cooperative-self-resolution/contracts/audit-events.md @@ -0,0 +1,147 @@ +# Contract: Audit-Event Surface + +This feature lands one new `MediationEventKind` variant and reuses +two existing variants on a new code path. It does **not** introduce +any new event payload that inlines rationale text (carryover from +FR-120). + +## New Event: `self_resolution_offered` + +**Kind string**: `"self_resolution_offered"` +**Rust variant**: `MediationEventKind::SelfResolutionOffered` +**Emitted by**: `src/mediation/follow_up.rs`, inside the new +`SuggestSelfResolutionWithSummary` dispatch arm, **before** the +two outbound gift-wraps are published. The audit row is +committed in the same transaction as the two +`mediation_messages` outbound rows (transactional outbox: a +crash between commit and publish leaves both the audit row and +the outbound rows in place; the next tick's idempotency check +sees the audit row and skips re-firing the branch). + +**Payload**: see `data-model.md` for the full JSON shape. Recap: + +```json +{ + "session_id": "", + "classification_confidence": 0.85, + "rationale_id": "", + "languages": { + "buyer": "es", + "seller": "en" + } +} +``` + +**Companion columns** (already on `mediation_events`): + +| Column | Value | +|--------|-------| +| `session_id` | the session for which the invitation fires | +| `kind` | `'self_resolution_offered'` | +| `rationale_id` | NULL — the rationale-id reference is in the payload, NOT this column. (Existing convention: this column is reserved for rationales whose lifecycle is owned by the audit row itself; here the rationale is owned by the round-N classification call.) | +| `prompt_bundle_id` | the bundle pinned on the session at dispatch time | +| `policy_hash` | the policy hash pinned on the session at dispatch time | +| `occurred_at` | unix-secs at the moment the row is committed | + +## Reused Event: `summary_generated` + +**Kind string**: `"summary_generated"` (existing) +**Reused by**: the same dispatch arm, immediately after the +party-facing publishes succeed. The summary delivery flows +through the existing `deliver_summary` helper, which already +writes a `summary_generated` row. **No semantic change** to the +existing payload schema — the only delta is that the summary's +`suggested_next_step` field carries the literal value +`"self_resolution_offered_to_parties"` so a downstream consumer +(solver UI, dashboards, log search) can distinguish a +self-resolution-offered cooperative case from a vanilla one. + +The `suggested_next_step` field is part of the existing summary +payload shape; this feature only widens its **value** vocabulary +by one literal. + +## Reused Event: `escalation_recommended` + +**Kind string**: `"escalation_recommended"` (existing) +**Reused by**: the human-assistance opt-in path (User Story 2) +when `policy::evaluate(...)` short-circuits to +`Escalate(EscalationTrigger::PartyRequestedHuman)`. The existing +`escalation::recommend(...)` helper writes the row; this feature +only widens the **trigger** vocabulary by one variant +(`party_requested_human`). + +The escalation payload's `trigger` field is part of the existing +schema. No structural change. + +## Audit-Row Sequence Invariant + +For a session that takes the cooperative-invitation path, the +audit-row sequence MUST be: + +``` +1. session_opened (existing — emitted at session open) +2. classification_produced (existing — emitted on round 0 / round 1) +3. self_resolution_offered (NEW — this feature) +4. summary_generated (existing — emitted from inside deliver_summary) +5. session_closed (existing — emitted later by dispute_resolved) +``` + +Steps 3 and 4 land in the same transaction. Step 5 lands later +when Mostro genuinely resolves the underlying dispute (existing +`dispute_resolved` handler). + +## Audit-Row Sequence on the Opt-In Path + +If a party opts in to human assistance after the invitation, the +sequence becomes: + +``` +1. session_opened +2. classification_produced (round 0 / round 1) +3. self_resolution_offered +4. summary_generated +5. classification_produced (round 2: human_requested = true) +6. escalation_recommended (trigger = party_requested_human) +7. handoff_prepared (existing — Phase 4 takes over) +8. session_closed (later, via dispute_resolved) +``` + +Note that steps 3 and 4 still fire on round 1; the opt-in is +detected on round 2 and produces steps 5 and 6 in that round. + +## State Machine Edge Added + +This feature adds **one** legal transition to +`MediationSessionState::can_transition_to`: + +```rust +| (SummaryDelivered, EscalationRecommended) +``` + +Required because the opt-in path observes the +`human_requested = true` flag on a round whose session may +already have transitioned to `SummaryDelivered` (the dispatch +arm for the cooperative invitation does walk the session through +`Classified → SummaryPending → SummaryDelivered`). When the +opt-in fires from `SummaryDelivered`, the existing +`escalation::recommend(...)` helper performs the transition; the +new edge makes that transition legal. + +The reverse direction (`EscalationRecommended → SummaryDelivered`) +is NOT added — once escalated, the case stays escalated. + +## What This Feature Does NOT Audit + +- The rendered party-facing message text. The text is determined + by `(prompt_bundle_id, language)` on the audit row, which a + forensic process can replay against a frozen snapshot of the + bundle. The text is NOT inlined in any audit row, in keeping + with the FR-120 carryover from Phase 3. +- The classifier's full JSON output. The rationale-id reference + in the payload is the canonical handle into + `reasoning_rationales`, which is already the pattern for every + other audit row in the system. +- Per-party delivery success. The existing `notifications` table + already tracks per-recipient send outcomes for the solver + summary; the party-facing publishes use the same outbound chain + and inherit its retry / `Failed` row behaviour from Phase 3. diff --git a/specs/005-cooperative-self-resolution/contracts/classifier-output.md b/specs/005-cooperative-self-resolution/contracts/classifier-output.md new file mode 100644 index 0000000..cdd7334 --- /dev/null +++ b/specs/005-cooperative-self-resolution/contracts/classifier-output.md @@ -0,0 +1,132 @@ +# Contract: Classifier-Output Surface (Additive `human_requested` Field) + +## Scope + +This feature extends the existing classifier-call surface with one +additive field, `human_requested: bool`. The change is **additive +on the wire, additive on the Rust struct, and provider-portable**. +Both reasoning adapters in the codebase (OpenAI-compatible and +Anthropic) MUST update their prompt + parser pair to honour this +contract before the feature can ship for that provider. + +## Wire-Level JSON Schema (Round N+1, after a `self_resolution_offered` event) + +```json +{ + "classification": "coordination_failure_resolvable", + "confidence": 0.83, + "suggested_action": "summarize", + "buyer_clarification": "", + "seller_clarification": "", + "rationale": "", + "human_requested": false +} +``` + +The `human_requested` field is a plain JSON boolean. It is +**only requested by the prompt** on rounds following a +`self_resolution_offered` audit event for the session. Other +rounds may or may not include the field; if they do, its value +is ignored by `policy::evaluate(...)` (the short-circuit +documented below only fires after a `self_resolution_offered` +event has been recorded for the session). + +## Prompt-Side Instruction + +The classifier prompt for round N+1 (after the cooperative +invitation) gains, near the existing classification-instruction +block, an additional sentence and one example list: + +``` +human_requested (boolean): Set to true if and only if the latest +party reply contains an explicit, unambiguous request for a human +solver / mediator / arbitrator. Examples: + - "I want a human" + - "necesito un humano" + - "please escalate to a person" + - "que un humano lo revise" + - "preciso de um humano" +Vague phrasings like "this is taking too long" or "I'm frustrated" +do NOT count. When in doubt, set to false. +``` + +The "when in doubt, set to false" clause is intentional: a +false negative defers escalation by one round (the user typically +re-states); a false positive escalates to human prematurely on a +case the parties might have resolved themselves. + +## Rust-Side Parsing + +`ClassificationResponse` (`src/models/reasoning.rs`) gains: + +```rust +pub struct ClassificationResponse { + // ... existing fields ... + + #[serde(default)] + pub human_requested: bool, +} +``` + +`serde(default)` covers two scenarios: + +1. A provider that hasn't yet been updated to emit the field. + The struct deserialises with `human_requested = false`; the + opt-in path silently never fires for that provider until the + provider's prompt + parser are updated. A startup-time health- + check (R-003 in `research.md`) logs a warning when the + provider doesn't echo a probe. +2. Round 0 / round 1 responses where the prompt didn't request + the field. Same default; no false escalation. + +## Policy-Side Behaviour + +`policy::evaluate(...)` adds **one** short-circuit before the +existing classification-label dispatch: + +```rust +fn evaluate(...) -> PolicyDecision { + if classification.human_requested && session_has_self_resolution_offered(session_id) { + return PolicyDecision::Escalate(EscalationTrigger::PartyRequestedHuman); + } + // ... existing logic ... +} +``` + +The `session_has_self_resolution_offered(...)` predicate is a +cheap SQL-level existence check against `mediation_events` — +the same shape used by the new dispatch arm's one-shot guard. +Coupling the short-circuit to "an invitation has actually been +sent for this session" avoids two failure modes: + +- An adversarial party trying to skip mediation by emitting + human-assistance phrasing on round 0 before any invitation + fires (covered by the existing `Escalate` paths anyway, but + defence-in-depth here is cheap). +- A buggy provider that mistakenly emits `human_requested = true` + on a round where the prompt did not request the field (the + predicate guard means the field is only consulted when the + prompt would have requested it). + +## Provider Coverage + +| Provider | Required updates | Status (at plan time) | +|----------|------------------|-----------------------| +| OpenAI-compatible (PPQ.ai, OpenAI direct) | classifier prompt template emits the field on round N+1; response parser accepts it. | Not yet implemented; lands in Phase 2 tasks. | +| Anthropic | classifier prompt template emits the field on round N+1; response parser accepts it. | Not yet implemented; lands in Phase 2 tasks. | +| Future providers | Same two changes. The portability invariant is a Phase 0 constitution-check item (Principle X). | Out of scope for this feature. | + +Both adapters MUST be updated in the same PR set so a deploy +that runs Anthropic in production cannot accidentally lose the +opt-in path because the feature shipped only against the OpenAI +adapter. + +## Backward Compatibility With Old Sessions + +A session opened before this feature ships has no +`self_resolution_offered` audit row; the policy short-circuit +predicate returns false; the new field has no effect on the +session's classification rounds. The only externally observable +change for old sessions is that the `human_requested` field +appears on classifier-call traces at `debug!` level; this is +data-shape additive and does not break any existing parser. diff --git a/specs/005-cooperative-self-resolution/contracts/config.md b/specs/005-cooperative-self-resolution/contracts/config.md new file mode 100644 index 0000000..3ff9210 --- /dev/null +++ b/specs/005-cooperative-self-resolution/contracts/config.md @@ -0,0 +1,121 @@ +# Contract: `[mediation]` Configuration Surface (Cooperative Self-Resolution) + +## New Keys + +This feature adds two keys under the existing `[mediation]` +section of `config.toml`. Both have `serde(default)` semantics so +operators upgrading without editing their config file inherit +sensible defaults. + +| Key | Type | Default | Backed by FR / SC | +|-----|------|---------|-------------------| +| `self_resolution_threshold` | `f32` (range `0.0..=1.0`) | `0.75` | FR-010 | +| `self_resolution_enabled` | `bool` | `true` | FR-011, SC-007 | + +## Operator-facing Documentation (suggested `config.example.toml` snippet) + +```toml +[mediation] +# ... existing keys ... + +# --- Cooperative self-resolution branch (Feature 005) --- + +# Confidence floor at which Serbero invites parties to coordinate +# the resolution among themselves on a coordination_failure_resolvable +# classification. Range: 0.0..=1.0. +# +# Default 0.75 is conservative. Set higher (e.g. 0.90) to fire only +# on very-high-confidence cooperative cases. Set lower at your own +# operational risk — the feature is one of two lines of defence +# against false-positive cooperative classifications, the other +# being the static-template / no-fund-action-keyword guarantee. +# +# Setting this to 1.0 effectively disables the branch (no +# real-world classifier emits exactly 1.0); use the kill-switch +# below instead. +# +# Default: 0.75 +self_resolution_threshold = 0.75 + +# Master kill-switch for the cooperative self-resolution branch. +# When false, the branch is bypassed entirely and Serbero behaves +# byte-for-byte as it did before this feature shipped (the legacy +# cooperative-summary path runs unchanged). +# +# Use during incident windows or audit reviews when you want to +# force every cooperative case through human review. +# +# Default: true +self_resolution_enabled = true +``` + +## Validation + +`MediationConfig` validation extends with two checks: + +```rust +if !(0.0..=1.0).contains(&cfg.self_resolution_threshold) { + return Err(...); +} +``` + +`self_resolution_enabled` is a plain bool — no range validation. + +If validation fails at startup, the daemon refuses to start with a +clear error message identifying the offending key. This matches +the existing `MediationConfig` validation pattern for similar +range-constrained f32 fields. + +## Interaction With Existing Keys + +- **`renotification_seconds`**, **`renotification_check_interval_seconds`**, + **engine tick interval**: not affected. The new branch fires on + the same engine tick that would have fired the existing + cooperative summary; no new schedule. +- **`session_timeout_seconds`** (party-unresponsive timeout): not + affected. The existing timeout still applies after a + `self_resolution_offered` event the same way it applies after + any other outbound; if both parties go silent for longer than + the configured window, the existing + `PartyUnresponsiveTimeout` escalation fires (and the SC-004 + silence-rate budget catches systemic patterns). + +## Operational Recipes + +### Disable the feature globally during an incident + +```toml +[mediation] +self_resolution_enabled = false +``` + +Restart the daemon. The legacy cooperative-summary path runs +unchanged (verified by SC-007 byte-for-byte test). + +### Tighten the trigger to very-high-confidence cases only + +```toml +[mediation] +self_resolution_threshold = 0.90 +``` + +Restart the daemon. Cases with confidence between 0.75 and 0.90 +fall through to the legacy summary-only path. + +### Roll out behind a feature flag + +`self_resolution_enabled = false` in production while the +templates and code land. Flip to `true` in a follow-up deploy +once translation review and SC-001 / SC-002 baseline metrics are +in place. + +## Downgrade Path + +If a future deploy reverts this feature: + +1. Set `self_resolution_enabled = false` in operator config and + restart. Legacy behaviour resumes immediately. +2. Optionally roll back the binary. The `mediation_events` rows + with `kind = 'self_resolution_offered'` left behind by the + feature-on period are ignored by the legacy code (the column + is unconstrained TEXT). No DB cleanup required. diff --git a/specs/005-cooperative-self-resolution/contracts/template-bundle.md b/specs/005-cooperative-self-resolution/contracts/template-bundle.md new file mode 100644 index 0000000..f6fd94f --- /dev/null +++ b/specs/005-cooperative-self-resolution/contracts/template-bundle.md @@ -0,0 +1,135 @@ +# Contract: Self-Resolution Template Bundle + +**File**: `prompts/phase3-self-resolution.md` +**Loaded by**: `src/prompts/bundle.rs` (existing loader; this +feature adds parsing for the new file) +**Pinned via**: the existing `prompt_bundle_id` + `policy_hash` on +`mediation_sessions`. Sessions opened against bundle v1 see v1 +templates even after a v2 deploys. + +## File Format + +The file is Markdown with one section per supported language. Each +section is introduced by an ISO-639-1 code in square brackets and +contains exactly two key-value pairs. + +```markdown +# Self-Resolution Templates + +[en] +template = "Thanks for the information. The typical resolution in +this kind of case depends on an action that both parties can +coordinate themselves." +human_assistance_optin = "If you'd prefer human assistance, let me +know and I'll route you to the assigned solver." + +[es] +template = "Gracias por la información. La resolución típica en +este tipo de casos depende de una acción que ambas partes pueden +coordinar entre ustedes." +human_assistance_optin = "Si prefieres asistencia humana, dímelo y +te redirijo al solver asignado." + +[pt] +template = "Obrigado pela informação. A resolução típica neste +tipo de caso depende de uma ação que ambas as partes podem +coordenar entre si." +human_assistance_optin = "Se preferires assistência humana, diz-me +e te redireciono para o solver designado." +``` + +## Rendering + +The full message a party receives is, byte-for-byte: + +``` +{template} {human_assistance_optin} +``` + +The two strings are concatenated with exactly one ASCII space. +Parties never see any framing prefix (no `Buyer:`, no `Round N.`), +consistent with the chat-scaffolding cleanup shipped earlier in +`main`. + +## Parsed Representation + +```rust +pub struct SelfResolutionTemplates { + pub by_language: HashMap, + pub fallback_language: String, // typically "en" +} + +pub struct SelfResolutionLanguageEntry { + pub template: String, + pub human_assistance_optin: String, +} +``` + +`fallback_language` MUST be present in `by_language`. Initial +shipping value: `"en"`. + +## Banned Substrings (FR-004 / SC-003) + +The keyword-audit unit test (`tests/phase3_self_resolution_template_audit.rs`) +loads the bundle and walks every `(language, entry)` cell, asserting +that the rendered string `format!("{} {}", template, +human_assistance_optin)` does **NOT** contain any of the following +substrings (case-insensitive, ASCII-folded for the diacritic +languages): + +| Language tag | Banned substrings | +|--------------|-------------------| +| `en` | `release`, `settle`, `cancel`, `disburse`, `transfer`, `refund`, `payout` | +| `es` | `liberar`, `liberación`, `liquidar`, `cancelar`, `transferir`, `reembolsar`, `desembolsar` | +| `pt` | `liberar`, `libertar`, `liquidar`, `cancelar`, `transferir`, `reembolsar`, `desembolsar` | + +The list is the union of "verbs that name a fund-moving action in +the Mostro / P2P-escrow domain" plus their direct cognates. New +entries land alongside any new language section. + +## Adding a New Language + +1. Append a `[xx]` section with a translator-reviewed + `template` + `human_assistance_optin` pair. +2. Extend the banned-substring table in + `tests/phase3_self_resolution_template_audit.rs` with the + equivalents of the verbs above for the new language. +3. Run `cargo test --test phase3_self_resolution_template_audit`. + It MUST pass before the PR can be merged. + +The Rust side does not need to change for a new language; the +loader's `HashMap` keys are language codes. + +## Operator-facing Documentation + +The bundle file's first paragraph (a non-code Markdown comment) +documents the constraint stack so a future maintainer who edits a +template understands why: + +```markdown + +``` + +## Pinning and Auditability + +The `phase3-self-resolution.md` file is part of the prompt-bundle +SHA-256 hash that gets pinned on every session opened. A change +to any template — even fixing a typo — bumps the bundle hash, so +sessions in flight at deployment time continue to see the version +they were opened with, while new sessions see the new version. +This is the same pinning property that the existing Phase 3 +prompt files already enjoy; no new mechanism added. + +The audit row for `self_resolution_offered` carries the bundle id +and policy hash explicitly (existing `mediation_events` +columns), so a forensic export can pin the exact rendered text by +joining against a frozen snapshot of the bundle for that hash. diff --git a/specs/005-cooperative-self-resolution/data-model.md b/specs/005-cooperative-self-resolution/data-model.md new file mode 100644 index 0000000..a3d33d9 --- /dev/null +++ b/specs/005-cooperative-self-resolution/data-model.md @@ -0,0 +1,293 @@ +# Phase 1 Data Model: Cooperative Self-Resolution Nudge + +This feature is **strictly additive at the data layer**. There is +**no SQL migration**: every persistent surface this feature touches +already exists. The new shapes are pure-Rust enum variants and a +small parsed-prompt struct held in memory by the existing +`PromptBundle` loader. + +The headings below mirror the layers of the existing codebase so a +reviewer can trace each new shape back to its current owner. + +## SQL Schema + +**No changes.** + +The two persistent surfaces this feature uses both already exist: + +| Surface | Existing? | What this feature writes | +|---------|-----------|--------------------------| +| `mediation_events` (audit) | Yes (Phase 3, migration v3) | One row per session with `kind = 'self_resolution_offered'`. `kind` is an unconstrained TEXT column, so this is value-level extension only. | +| `mediation_sessions.state` | Yes | The session reaches `summary_delivered` exactly the way it does on the existing cooperative-summary path; this feature does not introduce a new state value. | + +**No new tables. No new columns. No new indexes. No migration v6.** + +## Rust Enum Extensions + +### `MediationEventKind` (in `src/db/mediation_events.rs`) + +Add one variant to the existing enum: + +```rust +pub enum MediationEventKind { + // ... existing variants ... + SelfResolutionOffered, +} +``` + +`Display` / `FromStr` on this enum already follow the +`snake_case` convention the rest of the codebase uses; the +serialised string is `"self_resolution_offered"`. + +### `EscalationTrigger` (in `src/models/escalation.rs`) + +Add one variant: + +```rust +pub enum EscalationTrigger { + // ... existing variants ... + PartyRequestedHuman, +} +``` + +Serialised string: `"party_requested_human"`. The existing +`Display` / `FromStr` impls cover this automatically per the +codebase pattern. + +### `PolicyDecision` (in `src/mediation/policy.rs`) + +Add one variant carrying the gate decision: + +```rust +pub enum PolicyDecision { + // ... existing variants ... + SuggestSelfResolutionWithSummary { + confidence: f32, + }, +} +``` + +Sibling of the existing `Summarize { classification, confidence }`. +Carries `confidence` so the dispatch arm can pin it on the audit +row without re-deriving it from the classifier output. + +## Rust Struct Extensions + +### `MediationConfig` (in `src/models/config.rs`) + +Add two fields with sensible defaults: + +```rust +pub struct MediationConfig { + // ... existing fields ... + + #[serde(default = "default_self_resolution_threshold")] + pub self_resolution_threshold: f32, + + #[serde(default = "default_self_resolution_enabled")] + pub self_resolution_enabled: bool, +} + +fn default_self_resolution_threshold() -> f32 { 0.75 } +fn default_self_resolution_enabled() -> bool { true } +``` + +Defaults align with FR-010 (threshold) and FR-011 (kill-switch on +by default). `serde(default)` ensures pre-existing operator +config files keep loading without explicit opt-in. + +### `ClassificationResponse` (in `src/models/reasoning.rs`) + +Add one additive field: + +```rust +pub struct ClassificationResponse { + // ... existing fields ... + + #[serde(default)] + pub human_requested: bool, +} +``` + +`serde(default)` covers two cases: an out-of-date provider that +hasn't yet been updated to emit the field, and a provider that +emits it but the round in question is round 0 (where it's +meaningless and the prompt does not request it). + +### `PromptBundle` (in `src/prompts/bundle.rs`) + +Add one parsed-template field: + +```rust +pub struct PromptBundle { + // ... existing fields ... + + pub self_resolution: SelfResolutionTemplates, +} + +pub struct SelfResolutionTemplates { + /// Keyed by ISO-639-1 language code (`en`, `es`, `pt`, …). + pub by_language: HashMap, + + /// Default fallback when a party's detected language is not in + /// `by_language` or confidence is below the language-detection + /// threshold. MUST be present (typically `en`). + pub fallback_language: String, +} + +pub struct SelfResolutionLanguageEntry { + pub template: String, // The cooperative-coordination text. + pub human_assistance_optin: String, // The opt-in sentence appended after a single space. +} +``` + +`SelfResolutionLanguageEntry` is the unit of human-translator +review and the unit of the keyword-audit test (FR-004 / SC-003). + +The bundle hash that Phase 3 already pins on each session +(`prompt_bundle_id` + `policy_hash` columns on +`mediation_sessions`) extends automatically over the new file +because the loader sha256s the entire bundle directory. + +## Audit-Event Payload + +`mediation_events.payload_json` for the new +`self_resolution_offered` kind: + +```json +{ + "session_id": "", + "classification_confidence": 0.85, + "rationale_id": "", + "languages": { + "buyer": "es", + "seller": "en" + } +} +``` + +Field-by-field: + +- **`session_id`**: redundant with `mediation_events.session_id` + but kept in the payload so a payload-only export (e.g. a CSV + dump for forensics) is self-describing. +- **`classification_confidence`**: pinned at the moment of + dispatch; downstream debugging needs to know whether a borderline + fire correlated with cooperative resolution outcomes. +- **`rationale_id`**: SHA-256 content-hash reference into + `reasoning_rationales`. **The rationale text is NEVER inlined + here** (FR-120 / TC-103 carryover from Phase 3). +- **`languages`**: the per-party detected language code for which + template was rendered. Useful for SC-004 silence-rate analysis + segmented by language. + +## State Transitions + +**No new session states.** The `mediation_sessions.state` lifecycle +graph is unchanged from the post-fix version of `main`: + +``` +Opening → AwaitingResponse → Classified → SummaryPending → SummaryDelivered + ↓ + Closed + (via dispute_resolved) +``` + +This feature lands the new branch on the +`Classified → SummaryPending` transition (the same edge the +existing `Summarize` decision uses), with extra outbound side- +effects (the two party invitations) before the same +`SummaryPending → SummaryDelivered` edge fires. + +The session never re-enters `AwaitingResponse` after the +invitation. The opt-in escalation path (US2) takes the legal +`SummaryDelivered → … → EscalationRecommended` route via the +existing `dispute_resolved` / escalation-recommend pipeline; this +matches the carryover from the recent fix in `main` that closes +`summary_delivered` sessions on dispute resolution. + +> Wait — that path needs verifying. `SummaryDelivered → +> EscalationRecommended` is **NOT** in the existing legal +> transitions list (`SummaryDelivered → Closed` is the only edge +> out of `SummaryDelivered`). The opt-in escalation therefore +> applies to a session **before** it reaches `SummaryDelivered` — +> i.e. the round-N+1 classification arrives while the session is +> still in `AwaitingResponse`, and the escalation fires from there +> via the standard `AwaitingResponse → EscalationRecommended` edge +> that already exists. + +The corrected transition for User Story 2: + +``` +AwaitingResponse ─(self_resolution_offered fired in round N)→ AwaitingResponse + ─(round N+1 classification: human_requested = true)→ + EscalationRecommended → Closed +``` + +This works because the new branch does **not** flip the session to +`SummaryDelivered` immediately — it transitions +`AwaitingResponse → Classified → SummaryPending → SummaryDelivered` +inside the dispatch arm, but the round N+1 path runs **before** +the round-N dispatch lands `SummaryDelivered`. (See +`contracts/audit-events.md` for the precise ordering invariants +the implementation must hold.) + +> The Phase 1 design intentionally surfaces this invariant here so +> the implementing engineer doesn't accidentally land the +> `SummaryPending → SummaryDelivered` edge before checking the +> opt-in. The simplest correct implementation: the new dispatch +> arm uses the existing `deliver_summary` helper, which already +> transitions through `SummaryPending → SummaryDelivered` only +> *after* both parties' invitations have been published; the +> opt-in detection runs on the next tick's classification call, +> by which point the session has reached `SummaryDelivered` and +> the escalation must therefore *also* be reachable from there. + +**Resolution**: extend `MediationSessionState::can_transition_to` +with one new edge: `SummaryDelivered → EscalationRecommended`. The +edge is only taken when `policy::evaluate(...)` emits +`Escalate(PartyRequestedHuman)` on a session that's already in +`SummaryDelivered`. Audit-trail-wise this is exactly the same +shape as the existing `EscalationRecommended → Closed` edge taken +later by `dispute_resolved`. + +(See `contracts/audit-events.md` for the audit-row sequence the +state-machine extension produces.) + +## Configuration File Format + +`config.toml` additions, surfacing the new keys with their +defaults: + +```toml +[mediation] +# ... existing keys ... + +# Confidence floor at which Serbero invites parties to coordinate +# the resolution among themselves on a coordination_failure_resolvable +# classification. Range: 0.0..=1.0. Higher = more conservative +# (fewer invitations, fewer false positives). Lower = more +# permissive. Default: 0.75. +self_resolution_threshold = 0.75 + +# Master kill-switch. When false, the cooperative self-resolution +# branch is bypassed entirely and Serbero behaves byte-for-byte as +# it did before this feature shipped. Default: true. +self_resolution_enabled = true +``` + +See `contracts/config.md` for the operator-facing documentation +shape. + +## Summary of Persistence-Layer Impact + +| Layer | Change | +|-------|--------| +| SQLite migrations | None. Migration count stays at v5 (Phase 4). | +| `mediation_events.kind` values | +1 (`self_resolution_offered`). | +| Rust enums | +3 variants (one each on `MediationEventKind`, `EscalationTrigger`, `PolicyDecision`). | +| Rust structs | +2 fields on existing structs (`MediationConfig`, `ClassificationResponse`); +1 new field on `PromptBundle` carrying parsed templates. | +| State machine | +1 edge: `SummaryDelivered → EscalationRecommended`, taken only on the human-assistance opt-in path. | +| Audit-payload schemas | +1 (`self_resolution_offered` payload, no rationale text inlined). | +| Config file | +2 keys under `[mediation]`, both with sensible serde defaults. | +| Prompt bundle | +1 file (`phase3-self-resolution.md`); existing files referenced get amendments documented under `prompts/` in `plan.md`. | diff --git a/specs/005-cooperative-self-resolution/plan.md b/specs/005-cooperative-self-resolution/plan.md new file mode 100644 index 0000000..c5fd5f0 --- /dev/null +++ b/specs/005-cooperative-self-resolution/plan.md @@ -0,0 +1,196 @@ +# Implementation Plan: Cooperative Self-Resolution Nudge + +**Branch**: `005-cooperative-self-resolution` | **Date**: 2026-04-27 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/005-cooperative-self-resolution/spec.md` + +## Summary + +Extend the Phase 3 mediation pipeline with a single new policy branch +that fires when classification is `coordination_failure_resolvable`, +suggested action is `summarize`, confidence ≥ a configurable +threshold (default 0.75), and no prior `self_resolution_offered` +event exists for the session. The branch dispatches **two outputs in +parallel**: + +1. **Two party-facing messages** through the existing chat transport, + one to the buyer and one to the seller, in their detected + languages, drawn from a **static repository-hosted template + bundle** (`prompts/phase3-self-resolution.md`). The bundle lives + alongside the existing prompt artifacts and is loaded at startup + into the same `PromptBundle` infrastructure used by the rest of + Phase 3. The model decides only WHETHER to fire the branch; it + never authors the party-facing text. +2. **The existing structured solver summary** through the existing + `mediation_summary` notifier path, with `suggested_next_step = + "self_resolution_offered_to_parties"` so the solver can + distinguish a self-resolution-offered case from a vanilla + cooperative summary at a glance. + +Two new audit / control surfaces ride on the existing infrastructure: + +- A new `MediationEventKind::SelfResolutionOffered` variant + (audit-only — no schema change; the `mediation_events.kind` column + is already an unconstrained TEXT field). +- A new `EscalationTrigger::PartyRequestedHuman` variant emitted by + `policy::evaluate` when the classifier on a follow-up round flags + an explicit human-assistance request from a party. + +Configuration is two new keys under `[mediation]` — +`self_resolution_threshold` (f32, default 0.75) and +`self_resolution_enabled` (bool, default true; kill-switch). With +the kill-switch off, the pipeline behaves byte-for-byte the same as +it does today (SC-007). + +The feature is **strictly additive**: no DB migration, no changes to +the existing `Summarize` decision path beyond adding a new sibling +variant, no changes to the session lifecycle (the session still +ends at `summary_delivered`, exactly as the recently-shipped fix in +`main` left it). + +## Technical Context + +**Language/Version**: Rust stable, edition 2021 (same toolchain as Phases 1/2/3 and Phase 4). +**Primary Dependencies**: `nostr-sdk 0.44.1` (gift-wrap transport, reused), `mostro-core 0.9.1`, `rusqlite` (bundled, no new migration needed), `tokio` (existing runtime), `serde` + `serde_json` (config + classifier output deserialisation), `tracing`, `thiserror`. **No new crate pulls.** The reasoning provider trait already supports the existing classification round trip; the new `human_requested: bool` field rides through it as an additive struct field. +**Storage**: SQLite. **No migration.** The new audit event kind reuses the existing `mediation_events` table (kind is TEXT). The new policy decision variant and the new escalation trigger variant are pure Rust enum extensions. +**Testing**: `cargo test` + the existing integration-test harness (`nostr-relay-builder::MockRelay`, `common::SolverListener`, in-memory rusqlite, scripted `MockReasoningProvider`). No new test dependency. Five new integration-test binaries under `tests/` (one per user-story slice plus a keyword-audit unit test for the templates). +**Target Platform**: Linux server (the daemon is already Linux-targeted; no platform-specific code added). +**Project Type**: Single-binary Rust daemon — the existing `serbero` binary gains one new prompt bundle file, a small set of model / policy enum extensions, and one new dispatch arm in `mediation::follow_up`. +**Performance Goals**: SC-005 — "session reaches the assigned solver within one engine cycle" on the human-assistance opt-in path is met automatically by reusing the existing escalation pipeline, which already operates at one-engine-cycle latency. No new perf budget needed. +**Constraints**: SC-007 (zero externally observable behaviour change with kill-switch off) demands the new branch be gated cleanly so it cannot fire when `self_resolution_enabled = false`. FR-004 / SC-003 require an automated fund-action keyword check on every change to the template bundle. FR-009 + FR-120 (carried over from Phase 3) require the audit row to reference rationale-id only, never inline rationale text. +**Scale/Scope**: Same scale as the existing Phase 3 mediation pipeline. The new branch fires at most once per session and adds two outbound gift-wrap publishes (already the cost of any `AskClarification` round) plus one extra audit-event row. Negligible incremental load. + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +All 13 Serbero-constitution principles are satisfied by the +cooperative self-resolution spec. Mapping below is the evidence +examined at this gate; all references are to FRs and SCs in +`spec.md`. + +| # | Principle | Compliance source | +|---|-----------|-------------------| +| I | Fund Isolation First | FR-004 — invitation text MUST NOT name, instruct, suggest, or imply any fund-moving action; verified by an automated keyword check (SC-003). The feature does not grant Serbero any new permission, does not call admin-settle / admin-cancel, and does not move funds. | +| II | Protocol-Enforced Security Boundaries | FR-003 — invitation text comes from static repository templates, NOT from the LLM. The model only chooses *whether* to fire the branch; if it picked the wrong classification, the existing `Escalate` paths fire normally. The boundary that "Serbero must not author fund-action text" is enforced structurally by the template-only delivery, not by an LLM-prompt restriction. | +| III | Human Final Authority | FR-008 + US2 — explicit human-assistance opt-in escalates through the existing handoff pipeline. FR-007 — solver always receives the summary in parallel so they can override the cooperative path. US3 — non-cooperative classifications on later rounds escalate normally; the invitation does not lock the session into "cooperative limbo". | +| IV | Operator Notification Is a Core Responsibility | FR-007 — the existing solver-facing summary still fires unchanged, with `suggested_next_step = "self_resolution_offered_to_parties"` as the only delta. No solver visibility is lost compared to today's cooperative-summary path. | +| V | Assistance Without Authority | FR-005 — invitations end with a single sentence offering human assistance; phrasing avoids any framing as a binding decision. The session lifecycle (FR-013) and dispute closure remain untouched by Serbero — Mostro continues to own those. | +| VI | Auditability by Design | FR-009 — every dispatch writes an auditable event with session id, classification confidence, prompt-bundle version. FR-120 / TC-103 carryover: the rationale text is stored once in `reasoning_rationales` and referenced by id; the audit row never inlines it. | +| VII | Graceful Degradation | FR-011 — `[mediation].self_resolution_enabled = false` disables the entire branch without removing templates or modifying code; SC-007 — externally observable behaviour with the kill-switch off is byte-for-byte identical to the pre-feature pipeline. | +| VIII | Privacy by Default | The party-facing invitation reuses the existing per-party gift-wrap envelope (already minimum-disclosure). The solver-facing summary is the same one shipped today; no new audience. The audit row references rationale id, not text. | +| IX | Nostr-Native Coordination | Invitations are dispatched through the existing `chat::outbound` gift-wrap path. No new transport, no new relay subscription, no new event kind. | +| X | Portable Reasoning Backends | The new `human_requested: bool` classifier-output field is a plain JSON boolean; both the OpenAI-compatible adapter and the Anthropic adapter add it as an additive deserialiser field with the same default (`false` when absent), so each provider remains independently shippable. | +| XI | Incremental Scope and Clear Boundaries | Strictly additive: one new policy branch, one new event kind, one new escalation trigger, two config keys, one new prompt-bundle file. Zero changes to Phase 1/2/4 surfaces. The "Out of Scope" section of the spec explicitly forbids touching any non-cooperative classification's flow. | +| XII | Honest System Behavior | The invitation phrasing does not assert any factual claim about the dispute (it describes a *typical* coordination pattern, not a verdict). When confidence is below threshold, the path is silent (FR-012); when classification shifts non-cooperative, the standard escalation fires (US3). | +| XIII | Mostro Compatibility and Separation of Concerns | Serbero never invokes admin-settle / admin-cancel / TakeDispute as part of this feature. The opt-in escalation routes to a *human* solver who runs TakeDispute on their own Mostro instance. The fund-state authority remains entirely in Mostro. | + +**Decision**: GATE PASSES. No Complexity Tracking entries needed. + +**Post-design re-evaluation (after Phase 1 artifacts)**: data-model.md +(zero schema changes, three new Rust enum variants, two new config +keys, plus one additive state-machine edge `SummaryDelivered → +EscalationRecommended` to support the human-assistance opt-in +firing after a session has reached `SummaryDelivered`), +contracts/template-bundle.md (template-only authoring, +keyword-check obligation, language extensibility rules), +contracts/audit-events.md (`self_resolution_offered` payload pinned +to rationale-id reference), contracts/config.md (kill-switch + +threshold defaults), contracts/classifier-output.md (additive +`human_requested` field), and quickstart.md (SC-007 byte-for-byte +regression check on kill-switch-off path) collectively introduce +no new constitutional violations. The new state-machine edge is +purely additive and does not weaken any existing invariant; the +escalation pipeline reached from `SummaryDelivered` runs through +the existing `escalation::recommend(...)` helper unchanged. Gate +still PASSES. + +## Project Structure + +### Documentation (this feature) + +```text +specs/005-cooperative-self-resolution/ +├── plan.md # This file (/speckit.plan command output) +├── research.md # Phase 0 output (/speckit.plan command) +├── data-model.md # Phase 1 output (/speckit.plan command) +├── quickstart.md # Phase 1 output (/speckit.plan command) +├── contracts/ # Phase 1 output (/speckit.plan command) +│ ├── template-bundle.md # Static template bundle shape, language extensibility, keyword-check obligation +│ ├── audit-events.md # `self_resolution_offered` payload + `superseded_by_human` interaction +│ ├── config.md # `[mediation].self_resolution_*` TOML keys + defaults +│ └── classifier-output.md # Additive `human_requested` field on the existing classification JSON +├── checklists/ +│ └── requirements.md # Spec quality checklist (/speckit.specify output) +└── tasks.md # Phase 2 output (/speckit.tasks command - NOT created here) +``` + +### Source Code (repository root) + +This feature is a Phase 3 extension. It adds one new prompt-bundle +file under `prompts/`, a handful of additive model/policy enum +variants, and one new dispatch arm inside the existing +`src/mediation/follow_up.rs`. There is **no new top-level module** +because the surface is too small to justify one — the scope is "one +extra branch on the existing classify→summarize path". + +```text +prompts/ +├── phase3-system.md # MODIFIED: amend the "Allowed" list to include `self_resolution_offered` as an allowed output type, restate the unchanged fund-action prohibition. No structural rewrite. +├── phase3-self-resolution.md # NEW: static template bundle. Sections per supported language ([en], [es], [pt] initially) with `template` + `human_assistance_optin` strings. +├── phase3-classification.md # MODIFIED: classifier prompt gains `human_requested: bool` in the JSON-output schema; round-N+1 instructions describe when to set it. +├── phase3-escalation-policy.md # MODIFIED: document the new `party_requested_human` trigger. +└── (other files) # (unchanged) + +src/ +├── chat/ +│ └── outbound.rs # (unchanged — existing `build_wrap_with_audience` already supports per-party `m-aud` tag) +├── db/ +│ ├── mediation_events.rs # MODIFIED: extend `MediationEventKind` enum with `SelfResolutionOffered`. NO migration (kind is TEXT). +│ └── (other files) # (unchanged) +├── mediation/ +│ ├── policy.rs # MODIFIED: add `PolicyDecision::SuggestSelfResolutionWithSummary { confidence }`. Extend `evaluate(...)` so that on `(CoordinationFailureResolvable, Summarize)` with confidence ≥ threshold AND no prior `self_resolution_offered` audit row AND `self_resolution_enabled=true`, the new variant is returned. Otherwise fall through to the existing `Summarize` branch. +│ ├── policy.rs (cont.) # MODIFIED: add the human-requested short-circuit — when `classification.human_requested == true`, emit `Escalate(EscalationTrigger::PartyRequestedHuman)` regardless of label. +│ ├── follow_up.rs # MODIFIED: add the dispatch arm for the new `PolicyDecision::SuggestSelfResolutionWithSummary` variant. The arm renders the templates per detected party language, calls the existing `chat::outbound::send_chat_message_with_audience` path, then delegates to the existing `deliver_summary` with `suggested_next_step = "self_resolution_offered_to_parties"`. +│ ├── self_resolution.rs # NEW (small file): the template-renderer + per-party language picker. Pure-function module so the keyword-audit test can exercise it without spinning up a session. +│ └── (other files) # (unchanged — session lifecycle stays at `summary_delivered`) +├── models/ +│ ├── config.rs # MODIFIED: extend `MediationConfig` with `self_resolution_threshold: f32` (serde default 0.75) and `self_resolution_enabled: bool` (serde default true). The existing config-loading pipeline picks them up automatically. +│ ├── escalation.rs # MODIFIED: extend `EscalationTrigger` enum with `PartyRequestedHuman`. +│ ├── reasoning.rs # MODIFIED: add `human_requested: bool` (serde default false) on the classification-response struct so both reasoning adapters parse the field uniformly. +│ └── (other files) # (unchanged) +├── prompts/ +│ └── (loader; reads the new bundle file) # MODIFIED: add a field to `PromptBundle` carrying the parsed self-resolution templates; bundle hash updated automatically by the existing sha256 over all files in the bundle. +├── reasoning/ +│ ├── openai.rs # MODIFIED: classifier prompt template emits the additional `human_requested` JSON instruction (round N+1 only); response parser reads the new field. Uses the existing `extract_json_object` + `parse_classification` plumbing — no new branches in the retry loop. +│ ├── anthropic.rs # MODIFIED: same JSON-output addition for the Anthropic adapter to keep portability (Principle X). +│ └── (other files) # (unchanged) +├── lib.rs # MODIFIED: `pub mod mediation::self_resolution;` (or re-export through `mediation::mod`) +└── (other files) # (unchanged) + +tests/ +├── common/mod.rs # (unchanged — existing harness covers everything) +├── phase3_self_resolution_happy_path.rs # NEW: US1 — cooperative classification with high confidence dispatches both party invitations + solver summary; session ends at `summary_delivered`. +├── phase3_self_resolution_opt_in.rs # NEW: US2 — after invitation, classifier returns `human_requested = true`, session escalates with trigger `party_requested_human`. +├── phase3_self_resolution_no_lock_in.rs # NEW: US3 — after invitation, next round returns `ConflictingClaims`; session escalates under the standard `conflicting_claims` trigger. +├── phase3_self_resolution_one_shot.rs # NEW: edge case — repeated cooperative classifications never re-fire the invitation (FR-006 / SC-006). +├── phase3_self_resolution_kill_switch.rs # NEW: SC-007 — with `self_resolution_enabled = false`, the path is invisible; behaviour matches the pre-feature pipeline byte-for-byte (party-message count, audit-row count, solver-summary count). +└── phase3_self_resolution_template_audit.rs # NEW: unit test (no DB, no relay) — load the template bundle, walk every (language, section) cell, assert no banned fund-action keyword occurs in any rendered string. Backs FR-004 / SC-003. +``` + +**Structure Decision**: The existing Phase 1/2/3/4 layout is a +single-project Rust daemon with feature-scoped module trees under +`src/`. This feature does **not** introduce a new top-level module — +the scope is one extra policy branch and one extra dispatch arm, +which fits naturally inside `src/mediation/`. The only new file +under `src/mediation/` is `self_resolution.rs`, a small pure-function +module that owns template rendering and per-party language picking +so the keyword-audit unit test can exercise it without spinning up +a full session. Tests live as top-level integration files under +`tests/` following the established `phase3_*.rs` naming pattern, +one per user-story slice plus the edge-case binaries listed above. + +## Complexity Tracking + +> **Fill ONLY if Constitution Check has violations that must be justified** + +No constitution violations. Table intentionally left empty. diff --git a/specs/005-cooperative-self-resolution/quickstart.md b/specs/005-cooperative-self-resolution/quickstart.md new file mode 100644 index 0000000..52fd620 --- /dev/null +++ b/specs/005-cooperative-self-resolution/quickstart.md @@ -0,0 +1,243 @@ +# Quickstart: Cooperative Self-Resolution Nudge + +This document is the operator / reviewer's "what should I expect to +see when this feature is on" guide. It complements `spec.md` (the +business-facing description) and `plan.md` (the implementation +shape) with a hands-on walkthrough that mirrors each user story +end-to-end. + +## Prerequisites + +Before this feature can be exercised: + +- **`main` carries the `summary_delivered` lifecycle fix** that + defers `summary_delivered → closed` to the `dispute_resolved` + handler. Without it, the engine reopens duplicate sessions + mid-coordination and the cooperative invitation becomes + ineffective. (Already shipped in PR #47.) +- **A reasoning provider that emits the `human_requested` field + on round N+1**. As of this feature's plan date neither + adapter (OpenAI-compatible, Anthropic) emits it; a Phase 2 + task in this feature ships that update. +- **An updated `prompts/phase3-self-resolution.md` bundle file** + with at minimum `[en]` populated. Templates land in the same PR + as the code; the keyword-audit unit test refuses to merge a + bundle with banned substrings. + +## Configuration (operator-side) + +Two new keys under `[mediation]` in `config.toml`: + +```toml +[mediation] +self_resolution_threshold = 0.75 # (default) +self_resolution_enabled = true # (default; set false to kill-switch the feature) +``` + +See `contracts/config.md` for the full operator-facing notes. + +## User Story 1 — Cooperative resolves without solver action + +### What you'd see in the daemon log + +``` +INFO serbero::mediation::policy: classify_for_round: rationale persisted + session_id=… classification=coordination_failure_resolvable + confidence=0.85 rationale_id=… +DEBUG serbero::mediation::policy: evaluate: SuggestSelfResolutionWithSummary + session_id=… confidence=0.85 +INFO serbero::mediation::self_resolution: invitation dispatched to both parties + session_id=… buyer_lang=es seller_lang=en +INFO serbero::mediation: solver_summary_delivered + session_id=… suggested_next_step=self_resolution_offered_to_parties +INFO serbero::mediation::session: session reached summary_delivered + session_id=… +``` + +### What the parties see + +Each party receives **one** gift-wrap message in their detected +language. The buyer sees the `[en]` (or detected language) template; +the seller sees the `[es]` (or detected language) template; both +end with the human-assistance opt-in sentence. No `Buyer:` or +`Round N.` prefixes — the chat-scaffolding cleanup already shipped +in `main` ensures that. + +### What the assigned solver sees + +Exactly **one** `mediation_summary` notification, identical in +structure to today's cooperative-summary DM, with one delta: + +``` +suggested_next_step: self_resolution_offered_to_parties +``` + +A solver UI / dashboard / log search filtering on this literal can +distinguish a self-resolution-offered case from a vanilla +cooperative summary at a glance. + +### What the audit table records + +```sql +SELECT kind, occurred_at FROM mediation_events +WHERE session_id = '' ORDER BY occurred_at; +``` + +Should show, in order: + +``` +session_opened +classification_produced +self_resolution_offered <-- NEW +summary_generated +session_closed (later, when dispute_resolved fires) +``` + +### Verifying SC-006 (one-shot) + +Run the engine for a session that produces a second +`coordination_failure_resolvable` classification with high +confidence on a later round. The audit table MUST contain +**exactly one** `self_resolution_offered` row. The new branch +short-circuits via the existence check. + +## User Story 2 — Party opts in to human assistance + +### Setup + +Continue the User Story 1 session. After the invitation lands, +seed a buyer reply such as `"necesito un humano que revise esto"` +and let the engine ingest it. + +### What you'd see in the daemon log + +``` +DEBUG serbero::reasoning::openai: openai reasoning call response + attempt=0 model=… content_len=… content_sha256_prefix=… +INFO serbero::mediation::policy: evaluate: human_requested=true + session_id=… → Escalate(party_requested_human) +INFO serbero::mediation::escalation: recommend: party_requested_human + session_id=… handoff_event_id=… +INFO serbero::escalation::dispatcher: dispatched escalation + session_id=… solver=… +``` + +### What the audit table records (additionally) + +``` +classification_produced (round N+1) +escalation_recommended (trigger=party_requested_human) +handoff_prepared +``` + +The `handoff_prepared` row is consumed by the existing Phase 4 +dispatcher; a write-permission solver receives the handoff DM. + +### Verifying SC-005 (one-engine-cycle latency) + +Time from "buyer reply ingested" to "escalation_recommended row +committed" should be ≤ one engine tick. The cooperative invitation +adds zero latency to this path because it routes through the +existing escalation pipeline. + +## User Story 3 — No lock-in on cooperative branch + +### Setup + +Continue the User Story 1 session. After the invitation lands, +seed a buyer reply such as `"the seller never released, they +lied"` and let the engine ingest it. + +### What you'd see in the daemon log + +``` +INFO serbero::mediation::policy: classify_for_round + session_id=… classification=conflicting_claims confidence=0.92 +INFO serbero::mediation::policy: evaluate + session_id=… → Escalate(conflicting_claims) +INFO serbero::mediation::escalation: recommend: conflicting_claims +``` + +The session escalates under the standard `conflicting_claims` +trigger, **not** under `party_requested_human`. The previous +`self_resolution_offered` event does not bias the new round's +classification. + +## SC-007 Verification: Kill-switch off, byte-for-byte legacy + +To confirm the kill-switch is honoured: + +```toml +[mediation] +self_resolution_enabled = false +``` + +Restart the daemon. Run two side-by-side sessions: + +1. A session with the feature on (`self_resolution_enabled = true`) + that triggers a high-confidence cooperative classification. +2. A session with the feature off (`self_resolution_enabled = + false`) that triggers the same classification. + +Diff the audit-event rows for the two sessions: + +``` +session 1: session_opened, classification_produced, + self_resolution_offered, summary_generated, … +session 2: session_opened, classification_produced, + summary_generated, … +``` + +The only difference MUST be the absence of +`self_resolution_offered` in session 2. The +`summary_generated` payload's `suggested_next_step` field for +session 2 must carry the legacy value (whatever today's +cooperative-summary path uses), not +`"self_resolution_offered_to_parties"`. + +## Running the keyword-audit test + +``` +cargo test --test phase3_self_resolution_template_audit +``` + +Should pass on every PR. If it fails, the bundle file contains a +banned fund-action keyword in some language section; fix the +template before merging. + +## Dry-run for a new language section + +When adding `[de]` (or any new language): + +1. Append the section to `prompts/phase3-self-resolution.md`. +2. Extend the banned-substring map in + `tests/phase3_self_resolution_template_audit.rs` with the + German equivalents (`freigeben`, `bezahlen`, `abbrechen`, + `überweisen`, `erstatten`, `auszahlen`, …). +3. Run `cargo test --test phase3_self_resolution_template_audit` + locally. +4. Open the PR with both files in the same change set so a + reviewer can confirm the audit covers the new translation. + +## Forensic Replay + +To reconstruct what a party saw on a past +`self_resolution_offered` event: + +1. Look up the event row: + + ```sql + SELECT prompt_bundle_id, policy_hash, payload_json + FROM mediation_events + WHERE kind = 'self_resolution_offered' AND session_id = ''; + ``` + +2. The `prompt_bundle_id` plus `policy_hash` pin the exact bundle + version. Check out the corresponding commit and re-render the + message for the language code in `payload_json.languages.`. +3. The rendered string is exactly what the party received (no + per-session interpolation). + +This replay path is the same one used by the existing Phase 3 +forensic process for `summary_generated` and other audit rows; +no new tooling required. diff --git a/specs/005-cooperative-self-resolution/research.md b/specs/005-cooperative-self-resolution/research.md new file mode 100644 index 0000000..81b8bb2 --- /dev/null +++ b/specs/005-cooperative-self-resolution/research.md @@ -0,0 +1,309 @@ +# Phase 0 Research: Cooperative Self-Resolution Nudge + +## Open Questions Resolved + +The spec was authored without `[NEEDS CLARIFICATION]` markers (all +informed-default decisions documented in the requirements +checklist). Phase 0 research therefore focuses on **best-practice +investigation** for the dependencies and patterns this feature +touches, plus a deeper look at the policy-boundary question that +emerged in the original conversation that motivated this feature. + +--- + +## Decision: Static repository templates over LLM-authored party text + +**Decision**: The party-facing invitation comes from a static +`prompts/phase3-self-resolution.md` file checked into the repo, +parsed at startup into the existing `PromptBundle`. The LLM decides +**only** whether to fire the branch (via classification + confidence); +it never authors any user-visible text on this code path. + +**Rationale**: + +- **Constitution Principle II — Protocol-Enforced Security + Boundaries**: a security-critical boundary (no fund-action + language) MUST NOT depend on an LLM behaving correctly. Static + templates make the boundary structural: a policy violation can + only land if a human commits a bad template, and the keyword-audit + test (FR-004 / SC-003) catches that on every change. +- **Auditability (Principle VI)**: identical template rendered each + time means the audit row only needs to capture which template + version (via prompt-bundle hash) and which language was rendered; + it does not need to inline the rendered text. +- **Translation review**: a static bundle is reviewable by a human + translator once per language before shipping, and updates land as + reviewable diffs. +- **Reproducibility for tests**: integration tests can assert + byte-equality against the rendered output rather than fuzzy-match + LLM completions. + +**Alternatives considered**: + +- *LLM-authored invitation with a system-prompt restriction*: + rejected — violates Principle II (the boundary would depend on + the LLM honouring the system prompt under prompt-injection + conditions). The existing Phase 3 `authority_boundary_attempt` + trigger already escalates when an LLM emits fund-action wording in + free-form output, but using that as the *only* line of defence + for a feature whose entire purpose involves party-facing text + was deemed too risky. +- *LLM-authored with a runtime keyword-strip / sanitiser pass*: + rejected — keyword stripping is bypassable by paraphrase; doesn't + meaningfully tighten the boundary. + +--- + +## Decision: Bundle integration via the existing `PromptBundle` loader + +**Decision**: Add `self_resolution: SelfResolutionTemplates` (or +similar) as a field on the existing `PromptBundle` struct. The new +`prompts/phase3-self-resolution.md` is parsed by the existing bundle +loader (which currently reads system / classification / escalation / +mediation-style / message-templates Markdown files). The bundle +hash that the engine pins on each session is recomputed +automatically over the new file too. + +**Rationale**: + +- **Single source of truth for prompt-policy versioning**: a bundle + hash that already captures everything Serbero is allowed to say + to anyone is the right scope. Sessions opened against bundle v1 + see v1 templates even after a v2 deployment, which matches the + existing Phase 3 prompt-pinning contract. +- **Zero new operator surface**: operators don't need a new config + knob to point at a templates file; the loader already knows the + bundle path from `[prompts]`. +- **Reuse of the existing audit chain**: the audit row references + `bundle_id` + `policy_hash`, which already exist; no new audit + fields needed. + +**Alternatives considered**: + +- *Inline templates inside `phase3-message-templates.md`*: rejected + — that file currently scopes to clarification-question templates, + which is a different policy class (model-authored, parameterised + text). Self-resolution templates are model-untouched, so a + separate file is cleaner. +- *Templates as Rust string constants in code*: rejected — defeats + the "translator can review without a Rust toolchain" goal and + makes adding a language a code-change rather than a prompts-change. + +--- + +## Decision: Confidence threshold global (not per-classification) + +**Decision**: One f32 config key `self_resolution_threshold` +(default 0.75) gates the new branch. The threshold is not +per-classification. + +**Rationale**: the new branch only fires for one classification label +(`coordination_failure_resolvable`); per-label tuning is moot until +a future feature opens additional cooperative-style branches for +other labels. When that happens, the threshold can be promoted to a +map keyed by classification label — the migration is purely additive +on the config struct. + +**Alternatives considered**: + +- *Per-classification threshold map*: rejected — premature + generalisation. +- *No threshold (always fire on cooperative)*: rejected — even a + single false-positive cooperative classification on a fraud- + adjacent case is operationally expensive. The threshold is a + cheap safety budget. + +--- + +## Decision: One-shot guard via prior `self_resolution_offered` audit row + +**Decision**: The guard against re-firing the invitation on a later +round is a SQL existence check against `mediation_events` for a row +with `kind = 'self_resolution_offered'` and matching `session_id`. +The check runs inside `policy::evaluate(...)` (or its caller in +`mediation::follow_up`) before the new branch is selected. + +**Rationale**: + +- **Crash-safety**: the audit row is committed in the same + transaction as the outbound rows. A crash between commit and + publish leaves both the audit row and the outbound rows in place; + on the next tick the guard sees the audit row and skips the + branch correctly. +- **Reuse of existing dedup pattern**: the same shape is used + today by Phase 3 for several once-per-session events (e.g. + `session_opened`, `summary_generated`). + +**Alternatives considered**: + +- *In-memory flag on the session struct*: rejected — does not + survive a daemon restart; would need a backstop check anyway. +- *New boolean column on `mediation_sessions`*: rejected — requires + a migration for a derived fact already capturable from the audit + trail. + +--- + +## Decision: Human-assistance opt-in via classifier-output extension + +**Decision**: The classifier prompt for round N+1 (after a +`self_resolution_offered` event) gains an additive `human_requested: +bool` JSON field. The classification deserialiser carries it as a +struct field with `serde(default)` so older provider responses (or +providers that don't yet emit the field) still parse cleanly with +the field defaulting to `false`. `policy::evaluate(...)` +short-circuits to `Escalate(EscalationTrigger::PartyRequestedHuman)` +when the field is `true`, regardless of the surrounding +classification label. + +**Rationale**: + +- **Reuses the existing classification round-trip**: no new model + call, no new latency. +- **Multilingual coverage for free**: the classifier already runs in + the party's language; detecting "necesito un humano" / "I want a + human" / "preciso de ajuda humana" works without per-language + keyword lists in Rust. +- **Provider portability (Principle X)**: an additive JSON field is + the lowest-risk change for both the OpenAI-compatible adapter and + the Anthropic adapter. Each adapter adds the parse + the prompt + instruction independently. + +**Alternatives considered**: + +- *Rust-side keyword regex on inbound messages*: rejected — + multilingual keyword lists are brittle (synonyms, slang, formal + vs informal registers) and miss the point that the classifier is + already doing language-aware semantic analysis on every round. +- *A separate model call dedicated to opt-in detection*: rejected — + adds latency and provider cost for a binary classification that + the existing call already covers. + +--- + +## Decision: Solver summary fires immediately, in parallel with party invitations + +**Decision**: When the new branch fires, the existing +`mediation_summary` notification to the assigned solver fires on +the same tick, with `suggested_next_step = +"self_resolution_offered_to_parties"`. There is no delay, no +"wait-and-see-if-parties-resolve" gate. + +**Rationale**: + +- **Audit completeness > solver inbox volume**: a solver who filters + on the new `suggested_next_step` value can mute the cooperative- + case feed if they want to; a solver who needs visibility (because + they're paged for the dispute) gets it without a delay. +- **No state sprawl**: there is no "scheduled-summary" intermediate + state to track. +- **Honest System Behavior (Principle XII)**: the solver receives + the same factual summary they'd receive on any other cooperative + case; the only delta is the operational hint that parties have + been invited to self-resolve, which is true at the moment the + summary is sent. + +**Alternatives considered**: + +- *Delay the summary by 1h (bet on self-resolution before paging + the solver)*: rejected — operationally fragile (timer state to + cancel on cooperative closure, tick-based scheduling, edge cases + on daemon restart) for marginal benefit. +- *Skip the summary when invitation fires*: rejected — kills the + solver's visibility into cooperative cases that don't actually + resolve cooperatively. SC-004 explicitly tracks the silence rate + as a feature-health metric; the solver-side summary is the + fallback path that catches stalled cooperative cases. + +--- + +## Decision: Threshold inclusivity (`≥`, not `>`) + +**Decision**: The branch fires when `confidence >= +self_resolution_threshold`. A confidence value exactly equal to +the threshold triggers the invitation. + +**Rationale**: matches the rest of the policy code (which already +uses `>=` on the existing low-confidence escalation gate) and +avoids the surprise of a threshold value that "never fires" because +operators set it to a round number that the model happens to emit +exactly. + +--- + +## Decision: Multilingual support: English / Spanish / Portuguese initial set, extensible by template append + +**Decision**: The initial bundle ships sections for `[en]`, `[es]`, +`[pt]`. New languages are added by appending a `[xx]` section to +the bundle file; no Rust changes required. The keyword-audit unit +test walks every section, so a new section ships only after the +audit passes for it. + +**Rationale**: + +- The existing transcript pipeline already supports independent + per-party language detection across these three; reusing that + detection is essentially free. +- Append-only language support keeps the operator workflow simple: + PR adding `[de]` template + audit-test pass = one file changed. + +**Alternatives considered**: + +- *Single English template, translate at delivery time via the LLM*: + rejected — re-introduces LLM-authored party text, which violates + Principle II. +- *Ship many languages on day one*: rejected — out-of-scope per the + spec. Adding a language post-launch is documented and trivial. + +--- + +## Open Risks Acknowledged at Plan Time (not blockers) + +- **R-001 — Translation review process**: SC-003 covers literal + banned-keyword leakage but not subtler issues like a phrase that + is grammatically correct but culturally implies a fund-action. + Mitigation deferred: relies on human translator review at the + PR that adds each language section. Documented in the bundle + contract (`contracts/template-bundle.md`). +- **R-002 — Pre-feature baseline metrics for SC-001 / SC-002**: the + spec's success criteria reference a 7-day baseline for + "cooperative cases that resolved without solver intervention" and + "median time from cooperative-detection to dispute closure". The + daemon doesn't currently emit a structured metric for either. + Mitigation: instrument those two counters as part of the + implementation tasks (Phase 2) so the baseline can be captured + from the feature-on-but-kill-switch-off period. SC-007 (kill- + switch off = byte-for-byte legacy) makes the kill-switch-off + baseline trivially capturable. +- **R-003 — Provider drift on `human_requested`**: an out-of-date + provider that does not return the new field still parses cleanly + (`serde(default)` makes it `false`), but the opt-in path will + silently fail to fire for that provider. Mitigation: at startup, + the reasoning health-check round trip can include a + `human_requested: true` example in the system prompt and assert + the parsed response carries the field; if not, log a warning at + `info!` level. Captured as a Phase 2 task, not a hard gate. + +--- + +## Best-Practice References Consulted + +- Existing Phase 3 spec (`specs/003-guided-mediation/spec.md`) for + the prompt-bundle pinning pattern and the "model decides whether, + templates author what" split that this feature mirrors. +- Existing `mediation_events` schema (no migration needed; `kind` + is unconstrained TEXT) and the audit-row payload conventions + established by `summary_generated` and `escalation_recommended`. +- The `ReasoningProvider` trait surface in `src/reasoning/mod.rs` + for the additive-JSON-field portability pattern (already used by + several existing classifier-output fields). +- The 2026-04-27 production transcript on dispute + `096fb2e4-7eca-4f59-96e8-ae49f69d1328` as the canonical + motivating case; the seller's "ya recibí el fiat" reply is the + reference shape for "cooperative case, high confidence" in + user-story 1. + +--- + +**Phase 0 status**: Complete. No `NEEDS CLARIFICATION` markers +remain. Ready to advance to Phase 1. diff --git a/specs/005-cooperative-self-resolution/spec.md b/specs/005-cooperative-self-resolution/spec.md new file mode 100644 index 0000000..edb19b0 --- /dev/null +++ b/specs/005-cooperative-self-resolution/spec.md @@ -0,0 +1,325 @@ +# Feature Specification: Cooperative Self-Resolution Nudge + +**Feature Branch**: `005-cooperative-self-resolution` +**Created**: 2026-04-27 +**Status**: Draft +**Input**: User description: "When Serbero classifies a mediation case as +coordination_failure_resolvable with high confidence, send a subtle, +language-matched invitation to each party in the existing chat transport, +inviting them to coordinate the resolution among themselves and offering an +explicit opt-in to escalate to the assigned human solver. The invitation +MUST NOT name, instruct, suggest, or imply any specific fund-moving action. +The Phase 3 policy boundary prohibiting fund-action language stays +unchanged. Motivated by the 2026-04-27 production transcript where the +seller confirmed receipt of fiat in Spanish but heard nothing back from +Serbero, leaving them in silence while a human solver had to intervene +manually for a case that was already trending toward cooperative +resolution." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 — Cooperative case resolves without human solver action (Priority: P1) + +A buyer files a dispute. Serbero opens a mediation session and asks both +parties what happened. The seller, in their preferred language, confirms +they have received the fiat payment. Serbero recognises this as a +cooperative case with high confidence, sends both parties a brief, +language-matched message acknowledging their input and noting that this +kind of case typically resolves through coordination between the parties +themselves, while explicitly offering them the option to ask for human +assistance if they'd prefer. The parties coordinate among themselves; +the human solver receives the same structured context they'd otherwise +receive (so they have full visibility) but does not need to actively +mediate. The dispute is closed cooperatively without solver intervention. + +**Why this priority**: This is the canonical happy path the feature +exists for. It directly addresses the two operational gaps observed in +production: parties are no longer left in silence after a meaningful +reply, and human solvers stop being pulled into the loop for cases that +were already trending toward cooperative resolution. Without this story, +the feature has no value. + +**Independent Test**: Can be fully tested in a controlled session where +the model returns the cooperative classification with high confidence; +verify both parties receive the templated invitation in their respective +detected languages and the assigned human solver still receives the +existing structured summary. + +**Acceptance Scenarios**: + +1. **Given** an active mediation session in which both parties have + replied at least once and the most recent classifier output is + "cooperative case, high confidence", **When** Serbero advances the + session, **Then** each party receives one message in their detected + language inviting them to coordinate the resolution themselves, with + a single sentence offering human assistance as an opt-in. +2. **Given** the same session, **When** the invitations are sent, + **Then** the assigned human solver receives the same structured + summary they'd otherwise receive on a cooperative classification, + marked so the solver can tell at a glance that parties have been + invited to self-resolve. +3. **Given** the parties subsequently coordinate the resolution + themselves and the underlying dispute is closed cooperatively, + **When** the closure is observed, **Then** Serbero records the + session as closed without ever having pulled the human solver into + active mediation. + +--- + +### User Story 2 — Party opts in to human assistance (Priority: P1) + +After receiving the self-resolution invitation, one party (buyer or +seller) replies in their party chat asking for human help — for example +"I'd like a human to look at this please" or "necesito un humano que +revise esto". Serbero detects the explicit human-assistance request and +escalates the session to the assigned solver immediately, regardless of +what the latest classification would otherwise suggest. The solver +receives the same handoff package they would receive on any other +escalation path. + +**Why this priority**: The opt-in is the entire reason the cooperative +nudge is acceptable. Without a working opt-in, Serbero would be nudging +parties toward self-resolution with no fall-back, which is unacceptable +for fund-related disputes. This story is what makes the policy boundary +defensible. + +**Independent Test**: Continue a session that has already received the +self-resolution invitation; ingest a fresh inbound reply that contains +an explicit human-assistance request; verify the session escalates to +the assigned solver with a recognisable trigger. + +**Acceptance Scenarios**: + +1. **Given** a session that has already received the self-resolution + invitation, **When** an inbound reply explicitly requests human + assistance, **Then** the session is escalated to the assigned human + solver with a clear "party requested human" indicator. +2. **Given** the same session, **When** an inbound reply is unrelated + ("ok, esperamos"), **Then** the session is NOT escalated — Serbero + continues normal handling. + +--- + +### User Story 3 — Cooperative invitation does not lock the session into a cooperative branch (Priority: P2) + +After Serbero invites the parties to self-resolve, the parties go quiet. +Some time later one party replies with new evidence that contradicts the +earlier cooperative tone (e.g., "the seller never released, they lied"). +The classifier on this new round returns a non-cooperative label +("conflicting claims" or "suspected fraud"). Serbero must NOT keep +treating the case as cooperative; it must escalate immediately on the +new round, just as it would have without the prior self-resolution +invitation. + +**Why this priority**: Confirms the nudge is a one-shot, evidence-driven +optimisation, not a state lock. Sessions that turn sour after the +invitation must follow the existing escalation rules. Without this +story, the feature could trap genuinely contentious cases in +"cooperative limbo". + +**Independent Test**: Continue a session from User Story 1; ingest a +fresh round of replies whose classification shifts to a non-cooperative +label; verify the policy emits the standard escalation for that label. + +**Acceptance Scenarios**: + +1. **Given** a session that received the self-resolution invitation in + round N, **When** round N+1 produces a classification that would + normally trigger escalation, **Then** the session is escalated under + the standard escalation trigger for that classification — not held + back by the prior cooperative branch. + +--- + +### Edge Cases + +- **Repeated cooperative classifications across rounds**: the + invitation MUST be sent at most once per session. A second cooperative + classification on a later round must not re-send the invitation; it + falls through to the existing solver-summary path. +- **Mixed-language reply on the round where the invitation fires**: + language detection is applied independently to buyer and seller; if + one side's language confidence is very low, the invitation defaults + to English for that side only. +- **Both parties opt in to human assistance simultaneously**: only one + escalation event fires (idempotent escalation). +- **Confidence sits exactly at the threshold**: the threshold is + inclusive (`≥`); a confidence of exactly the configured value + triggers the invitation. +- **Sub-threshold confidence on what would otherwise be a cooperative + case**: the existing solver-summary path runs unchanged; no party- + facing invitation is sent. +- **Operator wants to disable the feature globally** (e.g., during an + incident or audit window): a single configuration switch turns the + whole cooperative invitation behaviour off without redeploying. +- **Dispute is resolved on the underlying platform between the + invitation send and the parties' next reply**: the existing + dispute-resolved path closes the session normally; no further + Serbero action against the dispute. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: When the latest classification of an active mediation + session is "cooperative case" with confidence at or above a + configured threshold, the system MUST invite both parties (buyer and + seller) to coordinate the resolution themselves, in the existing + party chat transport. +- **FR-002**: Each party-facing invitation MUST be written in that + party's detected language. Buyer and seller languages are detected + independently; one party's invitation MUST NOT switch into the + other's language. +- **FR-003**: The invitation text MUST come from a static, repository- + hosted set of templates. The model is allowed to decide whether to + send the invitation; the model MUST NOT author the party-facing text. +- **FR-004**: The invitation text MUST NOT name, instruct, suggest, or + imply any specific fund-moving action (release, settle, cancel, + disburse, transfer, or any equivalent in any supported language). + This MUST be verified by an automated check against the template set + on every change. +- **FR-005**: Every invitation MUST end with one short sentence + offering human assistance as an opt-in (e.g., "if you'd prefer human + assistance, let me know and I'll route you to the assigned solver"), + in the party's language. +- **FR-006**: The invitation MUST fire at most once per mediation + session — repeat cooperative classifications on later rounds MUST NOT + re-send it. +- **FR-007**: When the invitation fires, the system MUST also deliver + the existing structured summary to the assigned human solver, marked + in a way that lets the solver tell at a glance that parties have been + invited to self-resolve. This guarantees the solver still has full + context if the parties do not resolve cooperatively. +- **FR-008**: When a party reply explicitly requests human assistance + (in any supported language), the system MUST escalate the session to + the assigned human solver immediately on the next round, regardless + of what the round's classification would otherwise suggest. +- **FR-009**: The system MUST record an auditable event the moment the + invitation is dispatched, capturing at minimum: which session, which + classification confidence, and which prompt-bundle version was active + — enough for an operator reviewing logs after the fact to reconstruct + why the invitation fired without the audit trail itself revealing + party content beyond what existing logs already reveal. +- **FR-010**: A configurable confidence threshold MUST gate FR-001; + the default value is 0.75 and operators MUST be able to raise it + (e.g., to 0.90) without code changes. +- **FR-011**: A configuration kill-switch MUST allow operators to + disable the entire cooperative-invitation behaviour without removing + the templates or modifying code. With the kill-switch off, the + pipeline behaves exactly as it did before this feature shipped. +- **FR-012**: When confidence falls below the configured threshold for + a cooperative classification, the existing behaviour (solver-only + summary, no party invitation) MUST remain unchanged. +- **FR-013**: The session lifecycle MUST end at the same state used by + the existing cooperative summary path (the state that already blocks + the eligibility predicate from re-opening a duplicate session). The + cooperative invitation does NOT change the session lifecycle + contract. +- **FR-014**: New supported languages MUST be addable by appending a + template section without code changes; the automated fund-action + keyword check (FR-004) MUST extend to the new section before it is + shipped. + +### Key Entities + +- **Self-Resolution Invitation**: A static, language-keyed message pair + (cooperative-coordination text + human-assistance opt-in sentence) + delivered to a single party. Identified by language and audience + (buyer / seller). MUST satisfy FR-004 (no fund-action keywords) for + every supported language. +- **Self-Resolution Audit Record**: A record on the session timeline + noting the dispatch of the invitation, including session id, + classification confidence at the moment of dispatch, and the + prompt-bundle version active at that moment. Does NOT carry the + rationale text itself. +- **Human-Assistance Request Marker**: A boolean signal derived from + the classifier on a round following an invitation, indicating that + the most recent party reply explicitly asks for a human solver. + Triggers FR-008. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: For mediation sessions where the cooperative invitation + fires, the share that resolve without active human solver + intervention increases by at least 30% compared to the baseline + measured during the seven days before this feature ships. +- **SC-002**: Median time from "Serbero detects cooperative case" to + "dispute closed on the underlying platform" decreases by at least + 20% for sessions where the invitation fires, versus the pre-feature + baseline. (No solver-side wait on cooperative cases.) +- **SC-003**: Zero invitations leak any banned fund-action keyword + into the user-facing text across all supported languages, verified + by an automated keyword check that runs on every change to the + template set. +- **SC-004**: For at least 95% of sessions where the invitation fires, + both parties either resolve the dispute or use the human-assistance + opt-in within seven days. Sessions where both parties simply go + silent stay below 5% of invitations sent; if this rate is exceeded + for two consecutive review windows, the invitation phrasing is + revisited. +- **SC-005**: When a party explicitly requests human assistance after + an invitation, the session reaches the assigned human solver within + one engine cycle (the same latency as any other escalation path in + the system today). +- **SC-006**: The cooperative invitation is sent at most once per + session in 100% of audited cases (one-shot guarantee under FR-006). +- **SC-007**: With the kill-switch disabled, the system's externally + observable behaviour is byte-for-byte identical to the system's + behaviour the day before this feature shipped (zero regression on + the legacy cooperative-summary path). + +## Assumptions + +- The existing party-language detection in the transcript pipeline is + reliable enough to route invitations to the correct language with + acceptable error. English, Spanish, and Portuguese are the initial + supported set; new languages are added by template append per + FR-014. +- "Cooperative case" classification with high confidence is a + meaningful share of overall mediation traffic; if it is rare, the + feature still works correctly but its absolute operational value + (SC-001, SC-002) is smaller. +- The assigned human solver is reachable via the same notification + channel used by the existing solver-summary path; this feature does + not introduce a new delivery surface for solvers. +- The platform's existing dispute-resolved path correctly closes + sessions when the underlying dispute is resolved cooperatively, + including sessions that received the invitation. This was the + subject of a recently-shipped fix in `main` and is treated here as + a prerequisite, not part of this feature's scope. +- Existing audit-event sinks, notification routing, and + prompt-bundle hashing infrastructure cover the new variants this + feature introduces; no new operational surface is required for + solvers or operators. + +## Dependencies + +- **Hard dependency**: the recently-shipped fix in `main` that stops + the system from auto-closing mediation sessions at the legacy + terminal state, so the eligibility predicate cannot reopen a + duplicate session while parties are still coordinating. Without + this fix, the cooperative invitation would still trigger duplicate + sessions on the next engine tick. +- **Soft dependency**: the existing Phase 3 policy boundary in the + system prompt continues to forbid Serbero from authoring fund-action + language in any free-form output. This feature relies on that + boundary; it does not weaken it. + +## Out of Scope + +- Modifying the policy for any classification other than the + cooperative case (conflicting claims, suspected fraud, unclear, and + not-suitable-for-mediation continue to follow their existing + escalation / clarification flows). +- Allowing Serbero to suggest, instruct, or imply fund-moving actions + by name. The existing prohibition is unchanged and re-stated. +- Auto-closing disputes on the underlying platform. Serbero does not + invoke admin-settle, admin-cancel, or any other platform admin + command, in this feature or anywhere else. +- Localisation beyond the languages supported by the existing + transcript pipeline at the time this feature ships (English, + Spanish, Portuguese initially). New languages are added by + appending template sections per FR-014; translator tooling and + workflow are out of scope for this feature. From ebb94756bb6367705aed9ae4facbc233ff1859d0 Mon Sep 17 00:00:00 2001 From: grunch Date: Mon, 27 Apr 2026 21:17:53 -0300 Subject: [PATCH 2/9] feat(mediation): cooperative self-resolution invitation branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the classifier marks a session as `coordination_failure_resolvable` with confidence ≥ the configured threshold (default 0.75), Serbero now sends a templated, language-matched invitation to both parties inviting them to coordinate the next step among themselves, with an explicit human-assistance opt-in. Static repo templates in `prompts/phase3-self-resolution.md` (en/es/pt initially) — the LLM only decides whether the branch fires, never what to say. The fund-action prohibition is enforced by a keyword-audit unit test that refuses to merge a bundle containing forbidden verbs in any language. Key surface changes: - `MediationEventKind::SelfResolutionOffered` — new audit kind, written before outbound dispatch with payload `{confidence, languages}` and a rationale-id reference (FR-120 / no inlined text). - `EscalationTrigger::PartyRequestedHuman` — fires when the model detects an explicit human-assistance request on a round following a `self_resolution_offered` event. - `PolicyDecision::SuggestSelfResolutionWithSummary { confidence }` — new dispatch arm; falls through to the legacy `Summarize` when any pre-condition fails (kill-switch off, sub-threshold, prior offered). - State-machine edge `SummaryDelivered → EscalationRecommended` so the opt-in can lift the session out of the post-summary state. - Two `[mediation]` config keys: `self_resolution_threshold` (f32, default 0.75) and `self_resolution_enabled` (bool, default true, master kill-switch). - `ClassificationResponse` extended with three additive serde-defaulted fields: `human_requested`, `buyer_language`, `seller_language`. The language fields ride through the structured response so dispatch doesn't need a separate Rust-side detector. - Operational tracing: `cooperative_case_detected` (on dispatch) and `cooperative_case_closed_externally` (on `dispute_resolved` for sessions that received the invitation) for SC-001 / SC-002 baseline. Backwards compatibility: - No SQL migration. The new audit kind is unconstrained TEXT; old sessions are unaffected. - Both reasoning adapters (OpenAI-compatible + Anthropic) reuse the shared `build_classification_prompt` / `parse_classification` from the OpenAI module, so the prompt + parser changes land in both. - Daemon upgrades that haven't shipped `phase3-self-resolution.md` yet log a warning at startup and inert the branch (placeholder rendered string) instead of refusing to start. Test coverage: - 8 new unit tests in `policy::evaluate` covering the cooperative branch (fires / threshold-inclusive / sub-threshold / kill-switch / one-shot / human-requested short-circuit / defence-in-depth / no-lock-in). - New `phase3_self_resolution_template_audit` test asserts the bundle parses, every language is covered by the banned-substring matrix, and rendered strings carry no fund-action keywords + always include the opt-in sentence. - Unit tests for `SelfResolutionTemplates::render_for` (5 cases) and the bundle parser (6 cases). - All 287 existing lib tests + integration suite pass with zero regressions. --- config.sample.toml | 26 ++ prompts/phase3-classification.md | 30 ++ prompts/phase3-escalation-policy.md | 9 + prompts/phase3-self-resolution.md | 47 +++ prompts/phase3-system.md | 13 +- .../005-cooperative-self-resolution/tasks.md | 238 ++++++++++++++ src/config.rs | 17 + src/db/mediation_events.rs | 90 +++++ src/handlers/dispute_resolved.rs | 20 ++ src/mediation/escalation.rs | 5 + src/mediation/follow_up.rs | 308 +++++++++++++++++- src/mediation/mod.rs | 2 + src/mediation/policy.rs | 294 ++++++++++++++++- src/mediation/self_resolution.rs | 194 +++++++++++ src/mediation/session.rs | 53 +++ src/models/config.rs | 23 ++ src/models/mediation.rs | 26 ++ src/models/reasoning.rs | 33 +- src/prompts/hash.rs | 33 ++ src/prompts/mod.rs | 62 +++- src/prompts/self_resolution_parser.rs | 242 ++++++++++++++ src/reasoning/openai.rs | 49 ++- tests/common/mod.rs | 3 + tests/phase3_authority_boundary.rs | 5 + tests/phase3_escalation_triggers.rs | 35 +- tests/phase3_followup_reasoning_failure.rs | 1 + tests/phase3_followup_round.rs | 2 + tests/phase3_followup_summary.rs | 13 + .../phase3_self_resolution_template_audit.rs | 192 +++++++++++ tests/phase3_take_reasoning_coupling.rs | 6 + tests/reasoning_anthropic.rs | 2 + tests/reasoning_ppqai.rs | 2 + 32 files changed, 2054 insertions(+), 21 deletions(-) create mode 100644 prompts/phase3-self-resolution.md create mode 100644 specs/005-cooperative-self-resolution/tasks.md create mode 100644 src/mediation/self_resolution.rs create mode 100644 src/prompts/self_resolution_parser.rs create mode 100644 tests/phase3_self_resolution_template_audit.rs diff --git a/config.sample.toml b/config.sample.toml index eb22252..ffdee08 100644 --- a/config.sample.toml +++ b/config.sample.toml @@ -68,6 +68,32 @@ solver_auth_retry_max_interval_seconds = 3600 solver_auth_retry_max_total_seconds = 86400 solver_auth_retry_max_attempts = 24 +# --- Cooperative self-resolution branch (Feature 005) --- +# +# When the classifier marks a session as +# `coordination_failure_resolvable` with high confidence, Serbero can +# send a templated, language-matched invitation to both parties +# inviting them to coordinate the next step among themselves, with an +# explicit human-assistance opt-in. The party-facing text is a +# static repo string (`prompts/phase3-self-resolution.md`) — the LLM +# only decides WHETHER the branch fires, never WHAT to say. Templates +# never name a fund-moving action; the keyword-audit unit test +# refuses to merge a bundle that violates this. + +# Confidence floor at which the cooperative branch fires. Range +# 0.0..=1.0; values outside that range fail loudly at startup. Set +# higher (e.g. 0.90) to fire only on very-high-confidence cooperative +# cases. Setting this to 1.0 effectively disables the branch — use +# the kill-switch below instead. +self_resolution_threshold = 0.75 + +# Master kill-switch. When false, the branch is bypassed entirely +# and Serbero behaves byte-for-byte as before this feature shipped +# (the legacy cooperative-summary path runs unchanged). Use during +# incident windows or audit reviews when you want to force every +# cooperative case through human review. +self_resolution_enabled = true + # --------------------------------------------------------------------------- # [reasoning] — the AI provider that classifies disputes and drafts # clarifying messages during Phase 3 guided mediation. diff --git a/prompts/phase3-classification.md b/prompts/phase3-classification.md index de3186a..e798027 100644 --- a/prompts/phase3-classification.md +++ b/prompts/phase3-classification.md @@ -89,3 +89,33 @@ Hard rules: clarification. - Each question stands on its own — don't cross-reference the other party's text, since each party only ever sees theirs. + +## Per-Party Language (Feature 005) + +The classifier MUST also emit two top-level fields: + +- `buyer_language` (string | null): ISO-639-1 code (e.g. `"en"`, + `"es"`, `"pt"`) inferred from the buyer's most recent reply. Set + to `null` when the latest message has no buyer content or is too + short to disambiguate. Required on **every** round, not only on + rounds following a `self_resolution_offered` event — the runtime + uses the codes to drive the cooperative-self-resolution dispatch + arm and keeps round-0/1 ready in case the cooperative branch + fires later. +- `seller_language` (string | null): same shape, for the seller. + +## Human-Assistance Opt-In (Feature 005, conditional) + +On rounds following a `self_resolution_offered` audit event for the +session — the runtime appends the request to the prompt only on +those rounds — the classifier MUST also emit: + +- `human_requested` (boolean): `true` if and only if the latest + party reply contains an explicit, unambiguous request for a human + solver / mediator / arbitrator. Examples: `"I want a human"`, + `"necesito un humano"`, `"please escalate to a person"`, `"que un + humano lo revise"`, `"preciso de um humano"`. Vague phrasings like + `"this is taking too long"` or `"I'm frustrated"` do **NOT** count. + When in doubt, set to `false` — a false negative defers escalation + by one round (the user re-states); a false positive escalates to + human prematurely on a case the parties might have resolved. diff --git a/prompts/phase3-escalation-policy.md b/prompts/phase3-escalation-policy.md index 4ba9386..b8b6113 100644 --- a/prompts/phase3-escalation-policy.md +++ b/prompts/phase3-escalation-policy.md @@ -42,6 +42,15 @@ serialized form is canonical; the enum name is the cross-reference. Evidence: rationale id. - **`notification_failed`** (`NotificationFailed`): Summary/escalation notification undeliverable. Evidence: notification error. +- **`party_requested_human`** (`PartyRequestedHuman`): On a round + following a `self_resolution_offered` event, a party reply + contained an explicit, unambiguous request for human assistance + (Feature 005, FR-008). The classifier detects the request via the + `human_requested: bool` field and `policy::evaluate` short-circuits + to this trigger before the regular classification-label dispatch. + Evidence: rationale id of the round that detected the request; + reference to the prior `self_resolution_offered` audit row that + scoped the short-circuit. ## Handoff Package diff --git a/prompts/phase3-self-resolution.md b/prompts/phase3-self-resolution.md new file mode 100644 index 0000000..e8c15db --- /dev/null +++ b/prompts/phase3-self-resolution.md @@ -0,0 +1,47 @@ + + +fallback_language = "en" + +[en] +template = "Thanks for the update — it sounds like the two of you may be close to coordinating the next step between yourselves. I'll keep monitoring this conversation in case anything changes." +human_assistance_optin = "If you'd prefer human assistance instead, just let me know in this chat and I'll route you to the assigned solver." + +[es] +template = "Gracias por la actualización: parece que ustedes dos podrían estar cerca de coordinar el siguiente paso entre sí. Sigo atento a esta conversación por si algo cambia." +human_assistance_optin = "Si prefieres asistencia humana, dímelo en este chat y te conecto con la persona asignada al caso." + +[pt] +template = "Obrigado pela atualização — parece que vocês dois podem estar perto de coordenar o próximo passo entre si. Continuo acompanhando esta conversa caso algo mude." +human_assistance_optin = "Se preferir assistência humana, me avise neste chat e eu encaminho você para a pessoa designada." diff --git a/prompts/phase3-system.md b/prompts/phase3-system.md index fbff173..ff0e356 100644 --- a/prompts/phase3-system.md +++ b/prompts/phase3-system.md @@ -41,9 +41,18 @@ limits, and honesty discipline. These rules apply to every reasoning call. - Allowed: classification labels with confidence scores, clarifying questions sourced from message templates, structured summaries for - the solver, explicit escalation recommendations. + the solver, explicit escalation recommendations, the + `self_resolution_offered` cooperative-invitation event (templated + per-party message in the party's detected language with an explicit + human-escalation opt-in; the templates are static repo strings that + MUST NOT name a fund-moving action, see + `prompts/phase3-self-resolution.md`). - Disallowed: autonomous dispute closure, binding decisions, - fund-related instructions, fabricated factual claims. + fund-related instructions, fabricated factual claims. The + fund-action prohibition extends to every party-facing surface, + including the cooperative-self-resolution invitation — that file's + contents are an extension of the Phase 3 authority boundary, not an + exception to it. ## Language Matching diff --git a/specs/005-cooperative-self-resolution/tasks.md b/specs/005-cooperative-self-resolution/tasks.md new file mode 100644 index 0000000..001da49 --- /dev/null +++ b/specs/005-cooperative-self-resolution/tasks.md @@ -0,0 +1,238 @@ +--- + +description: "Task list for Cooperative Self-Resolution Nudge (Feature 005)" +--- + +# Tasks: Cooperative Self-Resolution Nudge + +**Input**: Design documents from `/specs/005-cooperative-self-resolution/` +**Prerequisites**: plan.md (✅), spec.md (✅), research.md (✅), data-model.md (✅), contracts/ (✅), quickstart.md (✅) + +**Tests**: This codebase has a strong integration-testing tradition (`tests/phase3_*.rs`); the plan lists six new test binaries explicitly, so test tasks are part of each user-story phase. They are NOT TDD-first gates — Rust's `cargo test` runs them after implementation lands. + +**Organization**: Tasks grouped by user story (US1, US2, US3) per spec.md priorities. US1 + US2 are P1 and constitute the shippable MVP; US3 is P2 (regression-only, since its implementation is fully covered by US1+US2 work). + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks) +- **[Story]**: User story label (US1 / US2 / US3) — only on user-story phase tasks +- All paths are absolute under the repository root `/home/negrunch/dev/cancerbero/` + +--- + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Module wiring + loader-discovery before any code lands. + +- [ ] T001 Inspect `src/prompts/` (or wherever `PromptBundle` is loaded today — likely `src/prompts/mod.rs` and `src/prompts/bundle.rs`) and confirm the loader can be extended to parse one extra Markdown file alongside the existing bundle files (`phase3-system.md`, `phase3-classification.md`, etc.). Document the exact location of the loader call site for T010. +- [ ] T002 [P] Add `pub mod self_resolution;` to `src/mediation/mod.rs` (after the `pub mod ...;` lines for the existing siblings). Empty module file is added in T009; this task only registers the future module so the rest of the codebase compiles after T009 lands without further wiring. + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Data-model and prompt-bundle scaffolding that ALL three user stories depend on. + +**⚠️ CRITICAL**: No user-story work can begin until this phase is complete. + +### Enum + struct extensions (parallelizable — all touch different files) + +- [ ] T003 [P] Add `SelfResolutionOffered` variant to `MediationEventKind` in `src/db/mediation_events.rs`. The `Display` / `FromStr` impls already follow `snake_case`; serialised string is `"self_resolution_offered"`. Add the variant to any exhaustive match in the same file at the existing convention. +- [ ] T004 [P] Add `PartyRequestedHuman` variant to `EscalationTrigger` in `src/models/escalation.rs`. Serialised string `"party_requested_human"`. Update the `Display` / `FromStr` pair and any exhaustive match in the same file. +- [ ] T005 [P] Add `SuggestSelfResolutionWithSummary { confidence: f32 }` variant to `PolicyDecision` in `src/mediation/policy.rs`. The variant is a sibling of the existing `Summarize { classification, confidence }`. Update any exhaustive `match` over `PolicyDecision` in the same file (the dispatch happens in `follow_up.rs` and is wired in T018, but the compile errors here surface every site that needs updating). +- [ ] T006 [P] Extend `ClassificationResponse` in `src/models/reasoning.rs` with three additive fields, all `#[serde(default)]`: + - `human_requested: bool` (defaults to `false` so out-of-date providers and round 0/1 responses parse cleanly). + - `buyer_language: Option` (ISO-639-1 code, e.g. `"en"`, `"es"`, `"pt"`; `None` when the classifier cannot determine the language confidently). + - `seller_language: Option` (same shape as `buyer_language`). + + The two language fields are needed because there is **no Rust-side language-detection helper in this codebase** — language inference today happens implicitly inside the classifier LLM call when it emits `buyer_clarification` / `seller_clarification` in the inferred language. The new branch needs structured language codes so the dispatch arm in T018 can pick the right `[xx]` section from the static template bundle. `None` falls back to `bundle.self_resolution.fallback_language` (typically `"en"`) per the contract in `template-bundle.md`. +- [ ] T007 [P] Add `self_resolution_threshold: f32` (default 0.75) and `self_resolution_enabled: bool` (default true) to `MediationConfig` in `src/models/config.rs`. Use `#[serde(default = "...")]` helpers per the existing convention. Extend the existing validation function for `MediationConfig` to assert `0.0 ..= 1.0` for the threshold (matches the existing pattern for similar f32 fields). Refusing to start on out-of-range values is the documented behaviour per `contracts/config.md`. +- [ ] T008 [P] Add the legal transition `(SummaryDelivered, EscalationRecommended)` to `MediationSessionState::can_transition_to` in `src/models/mediation.rs`. Update the existing `can_transition_to_*` test cases (around line 260) so the new edge is covered (legal) and the inverse `(EscalationRecommended, SummaryDelivered)` is asserted illegal. + +### Self-resolution module + prompt bundle wiring + +- [ ] T009 Create `src/mediation/self_resolution.rs` with: (a) `SelfResolutionTemplates { by_language: HashMap, fallback_language: String }`, (b) `SelfResolutionLanguageEntry { template: String, human_assistance_optin: String }`, (c) a pure function `render_for(language_code: Option<&str>, templates: &SelfResolutionTemplates) -> String` that returns `format!("{} {}", template, human_assistance_optin)` for the matched language (or `fallback_language` if `language_code` is `None` or absent from `by_language`). All pure functions, no I/O — keeps the keyword-audit unit test fast. +- [ ] T010 Extend `PromptBundle` in the file identified by T001 with `pub self_resolution: SelfResolutionTemplates` and update the loader to parse `prompts/phase3-self-resolution.md`. The bundle's existing SHA-256 hash automatically extends over the new file because the loader hashes the whole bundle directory. Add the parser implementation in `src/prompts/` per the loader's existing pattern (look at how `phase3-message-templates.md` is parsed for reference). The parser MUST tolerate empty / commented-only sections gracefully and MUST require at minimum one entry whose key matches the file's `fallback_language` setting. +- [ ] T011 Create `prompts/phase3-self-resolution.md` with the operator-note Markdown comment from `contracts/template-bundle.md` followed by `[en]`, `[es]`, and `[pt]` sections. Each section MUST contain `template = "..."` and `human_assistance_optin = "..."` per the contract. Use the seed text in `contracts/template-bundle.md` for the initial values (or refer to the original draft in `/tmp/serbero-005-draft/spec-draft/spec.md` Appendix A). The default `fallback_language` is `"en"`. +- [ ] T012 [P] Amend `prompts/phase3-system.md`'s "Output Rules" section: add `self_resolution_offered` to the Allowed list with the constraints "templated per-party invitation, in the party's detected language, with an explicit human-escalation opt-in; MUST NOT name a fund-moving action". Re-state the unchanged fund-action prohibition adjacent to the new entry. +- [ ] T013 [P] Amend `prompts/phase3-escalation-policy.md` to document `party_requested_human` as a new escalation trigger. Use one paragraph similar in tone to the existing trigger documentation (`reasoning_unavailable`, `low_confidence`, etc.). +- [ ] T014 [P] Amend `prompts/phase3-classification.md` (or whichever prompt file holds the classifier JSON-output schema) to document three additive fields: + - `human_requested: bool` (optional; only requested by the runtime on rounds following a `self_resolution_offered` event for the session — covered by T021/T022 prompt assembly). + - `buyer_language: string | null` (ISO-639-1 code, e.g. `"en"`, `"es"`, `"pt"`; emitted on **every** round, not gated on prior events). Instruct the model to set the field to its best-guess code from the buyer's most recent reply, or `null` when the most recent reply has no buyer content or is too short to disambiguate. + - `seller_language: string | null` (same shape and instructions, for the seller). + + Add an example block showing the full classifier-output JSON with the three new fields populated. The two language fields are unconditionally part of the schema so the runtime always has structured language codes to drive `render_for(...)` (see T018) without needing a Rust-side language-detection helper. + +**Checkpoint**: Foundation ready. The codebase still compiles, `cargo test` still passes (the new variants and fields are unused so far, but every existing exhaustive match has been updated). Ready to begin user-story implementation. + +--- + +## Phase 3: User Story 1 (Priority: P1) 🎯 MVP — Cooperative case resolves without human solver action + +**Goal**: When the model classifies a session as `coordination_failure_resolvable` with confidence ≥ the configured threshold, dispatch one templated, language-matched invitation per party in the existing chat transport AND deliver the existing solver-facing summary with `suggested_next_step = "self_resolution_offered_to_parties"`. + +**Independent Test**: Per spec User Story 1 + the quickstart's "User Story 1 — Cooperative resolves without solver action" walkthrough. A controlled session with a scripted reasoning provider returning the cooperative label at confidence 0.85 must produce: two outbound `mediation_messages` rows (one per party, audience-tagged), one `self_resolution_offered` audit row, one `summary_generated` audit row, one `mediation_summary` notification to the assigned solver, and the session in `summary_delivered`. + +### Tests for User Story 1 + +- [ ] T015 [P] [US1] Create `tests/phase3_self_resolution_template_audit.rs` — pure-Rust unit test (no DB, no relay, no `MockReasoningProvider`). Loads the prompt bundle from the `tests/fixtures/prompts/` directory (the fixture bundle the rest of the test suite uses), walks every `(language_code, SelfResolutionLanguageEntry)` cell, builds the rendered string `format!("{} {}", template, human_assistance_optin)`, and asserts the rendered string contains NONE of the banned substrings from the matrix in `contracts/template-bundle.md` (case-insensitive, ASCII-folded). The matrix is a `&[(&str, &[&str])]` constant in the test file. Backs FR-004 / SC-003. + +### Implementation for User Story 1 + +- [ ] T016 [US1] Add the new branch to `policy::evaluate(...)` in `src/mediation/policy.rs`. Pre-condition predicate (top of the cooperative branch): `classification.label == ClassificationLabel::CoordinationFailureResolvable && classification.suggested_action == SuggestedAction::Summarize && confidence >= cfg.mediation.self_resolution_threshold && cfg.mediation.self_resolution_enabled && !session_has_self_resolution_offered(conn, session_id)`. When all true, return `PolicyDecision::SuggestSelfResolutionWithSummary { confidence }`. When any false, fall through to the existing `Summarize { classification, confidence }` branch (legacy behaviour preserved per FR-012 / SC-007). +- [ ] T017 [US1] Add the helper `fn session_has_self_resolution_offered(conn: &Connection, session_id: &str) -> Result` in `src/mediation/policy.rs` (or `src/db/mediation_events.rs` if it fits better there — wherever the existing `count_classification_events` lives is a good neighbour). Implementation: `SELECT 1 FROM mediation_events WHERE session_id = ?1 AND kind = 'self_resolution_offered' LIMIT 1`, return `Ok(row.is_some())`. Used by T016 and T023. +- [ ] T018 [US1] Add the dispatch arm for `PolicyDecision::SuggestSelfResolutionWithSummary` in `src/mediation/follow_up.rs`. The arm must, in order: (1) read each party's language from the structured classifier response — `classification.buyer_language.as_deref()` and `classification.seller_language.as_deref()` (added in T006). The runtime does NOT do its own language detection; the LLM is the single source of truth for language inference (consistent with how `buyer_clarification` / `seller_clarification` already inherit language implicitly today). (2) Call `mediation::self_resolution::render_for(language_code, &bundle.self_resolution)` per party — the helper itself falls back to `bundle.self_resolution.fallback_language` (typically `"en"`) when the passed code is `None` or absent from `by_language`, per the contract in `template-bundle.md`. (3) Open a SQL transaction and within it: write the `SelfResolutionOffered` audit event row (payload per `data-model.md` and `contracts/audit-events.md`, including the resolved per-party language codes) AND insert two `mediation_messages` rows (one per party, audience-tagged "buyer" / "seller", populated by `chat::outbound::build_wrap_with_audience`). (4) Commit. (5) Publish each gift-wrap outside the transaction (matching the commit-then-publish pattern of the existing initial drafter). (6) Call the existing `deliver_summary(...)` helper with `suggested_next_step = "self_resolution_offered_to_parties"`. The arm walks the session through `Classified → SummaryPending → SummaryDelivered` exactly as the legacy Summarize arm does, just with the extra outbound + audit row before `deliver_summary`. +- [ ] T019 [US1] Create `tests/phase3_self_resolution_happy_path.rs` integration test (US1 acceptance scenarios 1–3). Seed a session in `awaiting_response` with one buyer reply (Spanish) + one seller reply (Spanish: "ya recibí el fiat"). Wire a `MockReasoningProvider` that returns `CoordinationFailureResolvable` confidence 0.85 with `suggested_action = Summarize`. Run one engine cycle. Assert: (a) two outbound `mediation_messages` rows with `party = 'buyer'` / `'seller'` and content equal to the byte-exact `[es]` rendered template; (b) exactly one `self_resolution_offered` audit row with payload referencing the rationale id; (c) exactly one `summary_generated` audit row with `suggested_next_step = "self_resolution_offered_to_parties"`; (d) one `mediation_summary` notification published to the assigned solver; (e) session in `summary_delivered`. +- [ ] T020 [US1] Create `tests/phase3_self_resolution_one_shot.rs` integration test (FR-006 / SC-006). Continue from the happy-path scenario; ingest a fresh round of replies that produce another `CoordinationFailureResolvable` confidence 0.85. Assert: (a) the second round does NOT produce a second `self_resolution_offered` audit row, (b) the second round does NOT produce a second outbound pair of party invitations, (c) the second round still produces a normal `summary_generated` row (legacy Summarize falls through, since the one-shot guard fails the new branch's pre-condition). + +**Checkpoint**: User Story 1 fully functional. The cooperative invitation path ships end-to-end. With `self_resolution_enabled = false`, the legacy path is byte-for-byte unchanged. + +--- + +## Phase 4: User Story 2 (Priority: P1) — Party opts in to human assistance + +**Goal**: When a party reply on a round following a `self_resolution_offered` event explicitly requests human assistance, escalate the session to the assigned solver under the new `party_requested_human` trigger. + +**Independent Test**: Per spec User Story 2 acceptance scenarios. After a session has received the cooperative invitation, an inbound buyer reply containing "necesito un humano que revise esto" results in `escalation_recommended` row with `trigger = party_requested_human` and a Phase 4 `handoff_prepared` row. + +### Implementation for User Story 2 + +- [ ] T021 [US2] Update OpenAI classifier prompt assembly in `src/reasoning/openai.rs` with two changes: + 1. **Unconditional**: include in every classifier prompt the instruction to emit `buyer_language` and `seller_language` (ISO-639-1 codes; null when undeterminable), per the schema documented in T014. This change is NOT gated on prior `self_resolution_offered` — every round needs the language codes so the runtime always has structured per-party language available, even outside the cooperative branch (cheap and keeps round 0/1 ready in case the cooperative branch fires later). + 2. **Conditional**: on rounds where the session has a prior `self_resolution_offered` audit row (call `session_has_self_resolution_offered` from T017, threading through the request-context plumbing the existing classifier prompt assembly already uses), append the additional instruction block from `contracts/classifier-output.md` that documents the `human_requested: bool` field with example phrases. + + The provider parser already accepts all three fields via T006; this task only adds the prompt-side requests. +- [ ] T022 [US2] Update Anthropic classifier prompt assembly in `src/reasoning/anthropic.rs` with the same two changes as T021: (1) unconditional emission of `buyer_language` / `seller_language` on every round; (2) conditional `human_requested` instruction block on rounds following a `self_resolution_offered` event. Both adapters MUST land in the same PR set so a deploy that runs the Anthropic adapter in production cannot accidentally lose the opt-in path or the language-code emission. +- [ ] T023 [US2] Add the policy short-circuit at the top of `policy::evaluate(...)` in `src/mediation/policy.rs`, BEFORE the existing classification-label dispatch: `if classification.human_requested && session_has_self_resolution_offered(conn, session_id) { return PolicyDecision::Escalate(EscalationTrigger::PartyRequestedHuman); }`. The predicate guard prevents abuse from earlier rounds and from buggy providers that emit the field where the prompt did not request it. +- [ ] T024 [US2] Verify `escalation::recommend(...)` in `src/mediation/escalation.rs` accepts the new `EscalationTrigger::PartyRequestedHuman` variant end-to-end. If the helper has any exhaustive `match` over `EscalationTrigger`, add the arm with the appropriate string serialisation; otherwise no change is needed (the existing `Display` impl on the enum carries the new variant automatically). +- [ ] T025 [US2] Create `tests/phase3_self_resolution_opt_in.rs` integration test (US2 acceptance scenarios 1–2). Continue from the happy-path scenario; ingest a fresh buyer reply with explicit human-assistance text; configure the `MockReasoningProvider` to return `human_requested = true` on this round (any classification label). Assert: (a) one new `escalation_recommended` audit row with `trigger = party_requested_human`; (b) one new `handoff_prepared` audit row (Phase 4 takes over from there); (c) session is in `escalation_recommended`; (d) NO second `self_resolution_offered` row is written. Then ingest a separate session where the round-N+1 reply is unrelated ("ok, esperamos") and `human_requested = false`; assert NO escalation fires. + +**Checkpoint**: User Stories 1 + 2 functional. Feature is shippable as MVP. + +--- + +## Phase 5: User Story 3 (Priority: P2) — No lock-in on cooperative branch + +**Goal**: A non-cooperative classification on a round following the cooperative invitation MUST escalate under its own standard trigger, NOT under `party_requested_human` or any cooperative-branch hold. + +**Independent Test**: Per spec User Story 3 acceptance scenario 1. After invitation, a round-N+1 classification of `ConflictingClaims` confidence 0.92 produces `Escalate(ConflictingClaims)`, not `Escalate(PartyRequestedHuman)`. + +US3 has no implementation work beyond what T016 and T023 already deliver: T016's pre-condition predicate ensures the new branch only fires for `CoordinationFailureResolvable + Summarize`, so a `ConflictingClaims` classification falls through to the existing `Escalate(ConflictingClaims)` path; T023's predicate guard requires both `human_requested == true` AND a prior `self_resolution_offered` row, so an opt-in trigger does not accidentally hijack a legitimate non-cooperative escalation. What remains is the explicit regression test. + +### Implementation for User Story 3 + +- [ ] T026 [US3] Create `tests/phase3_self_resolution_no_lock_in.rs` integration test (US3 acceptance scenario 1). Continue from the happy-path scenario; ingest a round-N+1 reply ("the seller never released, they lied"). Wire the `MockReasoningProvider` to return `ConflictingClaims` confidence 0.92 with `human_requested = false`. Assert: (a) one new `escalation_recommended` audit row with `trigger = conflicting_claims`; (b) NOT `party_requested_human`; (c) session in `escalation_recommended`. The cooperative branch must not bias subsequent rounds. + +**Checkpoint**: All three user stories functional and independently testable. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Hardening, regression coverage, operator-facing documentation, and pre-merge sanity. + +- [ ] T027 [P] Create `tests/phase3_self_resolution_kill_switch.rs` integration test for SC-007 + the sub-threshold regression. Three side-by-side sessions (different dispute ids) with identical inbound round-1 transcripts and identical scripted classifier responses. Run the engine on: + - **Session A**: `self_resolution_enabled = true`, `self_resolution_threshold = 0.75`, classifier returns `CoordinationFailureResolvable` confidence `0.85`. + - **Session B**: `self_resolution_enabled = false`, `self_resolution_threshold = 0.75`, classifier returns the same `CoordinationFailureResolvable` confidence `0.85` (kill-switch case). + - **Session C**: `self_resolution_enabled = true`, `self_resolution_threshold = 0.75`, classifier returns `CoordinationFailureResolvable` confidence `0.50` (sub-threshold case — backs FR-010). + + Assertions: + - Session A: contains `self_resolution_offered` row; `summary_generated` payload `suggested_next_step = "self_resolution_offered_to_parties"`. + - Session B: NO `self_resolution_offered` row; `summary_generated` payload `suggested_next_step` carries the legacy value (whatever the existing cooperative-summary path uses), NOT `"self_resolution_offered_to_parties"`. + - Session C: NO `self_resolution_offered` row (sub-threshold takes the legacy path); `summary_generated` payload identical to session B's. Backs the byte-for-byte regression guarantee in both kill-switch and sub-threshold dimensions. +- [ ] T028 [P] Add a startup-time health-check probe for the `human_requested` field on the configured reasoning provider per R-003 in `research.md`. Implementation: extend the existing reasoning health-check (already invoked from `daemon.rs` startup) to include a one-shot classification call whose mock transcript is crafted to elicit `human_requested = true` on the response, then assert the parsed response carries the field with that value. If the field is absent or `false`, log a warning at `info!` level (`reasoning health-check: provider does not echo human_requested; cooperative-self-resolution opt-in path will silently fail for this provider until updated`). Do not fail the daemon — operators may intentionally run with kill-switch off until the provider catches up. +- [ ] T029 [P] Add operational structured-tracing events needed for SC-001 and SC-002 baseline measurement (R-002 in research.md). Implementation: emit two pinned `info!` events. Pinned event names (downstream tooling joins on these): + - **`cooperative_case_detected`** — emitted from the `self_resolution_offered` event-emit code path in T018, the moment Serbero decides to dispatch the cooperative invitation. Structured fields: `session_id` (string), `dispute_id` (string), `confidence` (f32), `prompt_bundle_id` (string), `buyer_language` (string-or-null), `seller_language` (string-or-null), `occurred_at_unix` (i64). + - **`cooperative_case_closed_externally`** — emitted from the `dispute_resolved` handler when the session being closed had a prior `self_resolution_offered` audit row (cheap existence check, same predicate shape as T017). Structured fields: `session_id` (string), `dispute_id` (string), `elapsed_secs` (i64, computed as `now - first_self_resolution_offered_at`), `prompt_bundle_id` (string), `occurred_at_unix` (i64). + + Both events MUST carry the dispute id so downstream tooling can join them. The two-event pair lets the operator compute SC-001 (cooperative-case detection volume) and SC-002 (cooperative-case external-resolution rate, plus median elapsed). No new metrics endpoint; the existing `tracing` setup is the metrics surface for this codebase. +- [ ] T030 [P] Update `config.example.toml` (or whichever operator-facing example config the repo ships) with the two new `[mediation]` keys and the operator-facing comment block from `contracts/config.md`. +- [ ] T031 Run the `quickstart.md` walkthrough end-to-end against a local daemon: trigger a session that produces `CoordinationFailureResolvable` confidence ≥ 0.75 (via a real or mocked reasoning provider), verify the daemon log lines match the expected sequence, verify the `mediation_events` rows match the audit-row sequence invariant, and verify the kill-switch SC-007 recipe. +- [ ] T032 Run `cargo clippy --all-targets --all-features` and address any new lints introduced by this feature. The codebase currently runs clippy-clean; preserve that property. +- [ ] T033 Run `cargo fmt --all` to apply the existing formatter rules. +- [ ] T034 Run the full test suite (`cargo test`). Confirm all 267+ existing tests pass and the new tests from US1/US2/US3/Polish (T015, T019, T020, T025, T026, T027) pass. Zero regressions tolerated. + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Setup (Phase 1)**: No dependencies — can start immediately. +- **Foundational (Phase 2)**: Depends on Setup. **BLOCKS** all user-story phases. +- **User Story 1 (Phase 3)**: Depends on Foundational. P1 / MVP. Tasks within parallelizable per the [P] markers. +- **User Story 2 (Phase 4)**: Depends on Foundational AND User Story 1's T017 (`session_has_self_resolution_offered` helper is shared). T021 / T022 / T023 / T024 can run in parallel once T017 lands; T025 depends on T021–T024. +- **User Story 3 (Phase 5)**: Depends on Foundational. T026 also reuses session shapes from US1, so it's easier to write after US1's T019 lands. No code dependency on US2 — US3 can ship with US1 alone if needed. +- **Polish (Phase 6)**: Depends on all desired user stories being complete. T027 is a regression test that requires T016 + T018 (US1 implementation). T028, T029, T030 are operator-side and can land in parallel with any of US1/US2. + +### Within Each User Story + +- Models / data shapes (Phase 2 tasks T003–T008) before code that depends on them. +- T017 (helper) before T016 (uses it) and T023 (uses it). +- T016 + T018 (cooperative branch + dispatch) before T019 (test that exercises them). +- T021–T024 (opt-in code) before T025 (test that exercises them). +- T015 (template audit) is independent and can land at any point after T011 + T009 ship. + +### Parallel Opportunities (within phases) + +**Phase 2 — independent files**: T003, T004, T005, T006, T007, T008 — all touch different files, all parallelizable. + +**Phase 3 — partial parallel**: T015 has no dependency on T016–T018 (different file). T016 → T017 → T018 → (T019 || T020) is the serial chain inside the implementation. + +**Phase 4 — partial parallel**: T021 || T022 (different files), then T023 (policy file), then T024 (verification, may be a no-op), then T025 (test). + +**Phase 6 — heavily parallel**: T027, T028, T029, T030 are all independent. T031 is the integration walkthrough; runs after the others. T032 / T033 / T034 are the pre-merge sanity sweep. + +--- + +## Implementation Strategy + +### MVP First (User Story 1 only) + +1. Complete Phase 1 (Setup) — small, ~30 min. +2. Complete Phase 2 (Foundational) — the bulk of the data-model + prompt-bundle work, ~1 day. +3. Complete Phase 3 (User Story 1) — the cooperative invitation path itself. +4. **STOP and VALIDATE**: run T019 + T020 + a manual session against a local daemon. Verify the user observation that motivated this feature (party gets a Spanish acknowledgement after confirming receipt) is now resolved. +5. Optionally deploy with `self_resolution_enabled = false` for a few days, capture SC-001 / SC-002 baselines, then flip the switch. + +### Incremental Delivery (recommended) + +1. Setup + Foundational → foundation ready. +2. Add User Story 1 + the kill-switch test (T027) → ship MVP. Cooperative cases now get a templated invitation; legacy path still available behind kill-switch. +3. Add User Story 2 → ship the opt-in. Now parties have an explicit "I want a human" off-ramp. +4. Add User Story 3 (regression-only) → ship the lock-in regression test. +5. Polish (T028 / T029 / T030 / T031 / T032 / T033 / T034) — can land alongside any of the above. + +### Parallel Team Strategy + +With multiple developers: + +1. Together: Setup + Foundational (T001–T014). +2. Once Foundational is done: + - **Developer A**: User Story 1 (T015 → T016 → T017 → T018 → T019 → T020). + - **Developer B**: User Story 2 (T021 || T022, then T023, T024, T025) — depends on T017 from Developer A (sync point). + - **Developer C**: Polish — T027 (test, depends on US1), T028 (health-check), T029 (counters), T030 (config example). + +--- + +## Notes + +- [P] tasks = different files, no incomplete-task dependencies. +- [Story] label maps task to spec.md user-story id for traceability. +- Each user story is independently completable and testable. +- Verify tests pass (and that the legacy path is unchanged with kill-switch off) before merging. +- Commit per task or logical group; the speckit `after_implement` hook offers an auto-commit if you want to opt into it (see `.specify/extensions/git/git-config.yml`). +- This feature ships **zero SQL migrations**. The migration counter on `main` stays at v5 (Phase 4). + +--- + +## Format Validation + +All 34 tasks above conform to the strict checklist format: + +- ✅ Every line starts with `- [ ]`. +- ✅ Every task has a sequential ID (T001 through T034). +- ✅ User-story phase tasks (T015–T020 for US1, T021–T025 for US2, T026 for US3) carry the `[US1]` / `[US2]` / `[US3]` story labels. +- ✅ Setup, Foundational, and Polish phase tasks do NOT carry a story label. +- ✅ Every task description includes the exact file path it touches. +- ✅ `[P]` markers are applied only where the task is genuinely parallelizable (different file, no incomplete dependency). diff --git a/src/config.rs b/src/config.rs index 9f59c7c..5aa9ffb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -14,9 +14,26 @@ pub fn load_config(path: &Path) -> Result { apply_env_overrides(&mut config); resolve_reasoning_api_key(&mut config)?; validate_escalation(&config)?; + validate_mediation(&config)?; Ok(config) } +/// Validate the `[mediation]` section for Feature 005 keys. +/// `self_resolution_threshold` is an f32 confidence floor that must +/// land in `0.0..=1.0`. Anything else would silently misbehave (e.g. +/// `1.5` would never trigger; `-0.1` would always trigger). The +/// `self_resolution_enabled` bool needs no validation. Loud failure +/// matches the pattern in `validate_escalation`. +fn validate_mediation(config: &Config) -> Result<()> { + let t = config.mediation.self_resolution_threshold; + if !(0.0..=1.0).contains(&t) || t.is_nan() { + return Err(Error::Config(format!( + "[mediation].self_resolution_threshold must be in [0.0, 1.0] (got {t})" + ))); + } + Ok(()) +} + /// Validate the `[escalation]` section. The defaults are safe, so /// most fields need no check; the one load-bearing guard is that /// `dispatch_interval_seconds == 0` would busy-loop the dispatcher. diff --git a/src/db/mediation_events.rs b/src/db/mediation_events.rs index 0867145..3a809ce 100644 --- a/src/db/mediation_events.rs +++ b/src/db/mediation_events.rs @@ -76,6 +76,14 @@ pub enum MediationEventKind { /// and `orphan_dispute_reference` (payload parses but the /// dispute id has no row in `disputes`). EscalationDispatchParseFailed, + /// FR-001 — emitted when Serbero sends the cooperative + /// self-resolution invitation to both parties on a high-confidence + /// `coordination_failure_resolvable` round. Once written for a + /// session, the policy branch refuses to fire again for that + /// session (one-shot guarantee per FR-006). Payload references + /// the rationale id of the producing classification so the full + /// rationale text stays out of `mediation_events` per FR-120. + SelfResolutionOffered, } impl MediationEventKind { @@ -106,6 +114,7 @@ impl MediationEventKind { EscalationSuperseded => "escalation_superseded", EscalationDispatchUnroutable => "escalation_dispatch_unroutable", EscalationDispatchParseFailed => "escalation_dispatch_parse_failed", + SelfResolutionOffered => "self_resolution_offered", } } } @@ -660,6 +669,83 @@ pub fn record_escalation_dispatch_parse_failed( ) } +/// FR-001 / FR-005 typed constructor for the cooperative +/// self-resolution invitation audit row. +/// +/// Emitted before the outbound gift-wraps publish so the audit row is +/// durable even if relay publishing fails. Payload carries the +/// per-party language codes the dispatch arm resolved (used for +/// forensic replay per `quickstart.md`); the full rationale text +/// stays in `reasoning_rationales`, referenced by `rationale_id`, +/// per FR-120. +#[allow(clippy::too_many_arguments)] +pub fn record_self_resolution_offered( + conn: &Connection, + session_id: &str, + rationale_id: &str, + confidence: f64, + buyer_language: Option<&str>, + seller_language: Option<&str>, + fallback_language: &str, + prompt_bundle_id: &str, + policy_hash: &str, + occurred_at: i64, +) -> Result { + let payload = json!({ + "confidence": confidence, + "languages": { + "buyer": buyer_language, + "seller": seller_language, + "fallback": fallback_language, + }, + }) + .to_string(); + record_event( + conn, + MediationEventKind::SelfResolutionOffered, + Some(session_id), + &payload, + Some(rationale_id), + Some(prompt_bundle_id), + Some(policy_hash), + occurred_at, + ) +} + +/// One-shot guard predicate (FR-006). Returns `true` iff a +/// `self_resolution_offered` audit row already exists for the given +/// session. The cooperative-self-resolution policy branch and the +/// `human_requested` short-circuit both consult this predicate so +/// the new behaviours stay scoped to sessions that have actually +/// received the invitation. +pub fn session_has_self_resolution_offered(conn: &Connection, session_id: &str) -> Result { + let n: i64 = conn.query_row( + "SELECT COUNT(*) FROM mediation_events + WHERE kind = 'self_resolution_offered' AND session_id = ?1", + params![session_id], + |r| r.get(0), + )?; + Ok(n > 0) +} + +/// Read the `occurred_at` of the *first* `self_resolution_offered` +/// row for a session, or `None` if none exists. Used by the +/// `cooperative_case_closed_externally` tracing event in +/// `dispute_resolved` to compute `elapsed_secs` per the operational +/// counters in `tasks.md` T029. +pub fn first_self_resolution_offered_at( + conn: &Connection, + session_id: &str, +) -> Result> { + conn.query_row( + "SELECT MIN(occurred_at) FROM mediation_events + WHERE kind = 'self_resolution_offered' AND session_id = ?1", + params![session_id], + |r| r.get::<_, Option>(0), + ) + .map_err(Into::into) +} + #[cfg(test)] mod tests { use super::*; @@ -757,6 +843,10 @@ mod tests { MediationEventKind::EscalationDispatchParseFailed, "escalation_dispatch_parse_failed", ), + ( + MediationEventKind::SelfResolutionOffered, + "self_resolution_offered", + ), ]; for (kind, want) in expected { assert_eq!(kind.as_str(), want, "kind {kind:?} string form drifted"); diff --git a/src/handlers/dispute_resolved.rs b/src/handlers/dispute_resolved.rs index a65b037..4cf0630 100644 --- a/src/handlers/dispute_resolved.rs +++ b/src/handlers/dispute_resolved.rs @@ -312,6 +312,26 @@ pub async fn handle(ctx: &HandlerContext, event: &Event) -> Result<()> { }; if let Some((session_id, pinned_bundle_id, pinned_policy_hash)) = summarized_session { + // Feature 005 / SC-002: if the session being closed + // received a cooperative-self-resolution invitation + // earlier, emit the `cooperative_case_closed_externally` + // structured trace event so operators can compute the + // external-resolution rate + median elapsed seconds. + // Best-effort lookup; a DB error here must NOT prevent + // the close from committing. + if let Ok(Some(invited_at)) = + db::mediation_events::first_self_resolution_offered_at(&tx, &session_id) + { + tracing::info!( + event = "cooperative_case_closed_externally", + session_id = %session_id, + dispute_id = %dispute_id, + elapsed_secs = now.saturating_sub(invited_at), + prompt_bundle_id = %pinned_bundle_id, + occurred_at_unix = now, + "cooperative_case_closed_externally" + ); + } let closed_payload = json!({ "reason": "dispute_resolved_externally", "dispute_id": dispute_id, diff --git a/src/mediation/escalation.rs b/src/mediation/escalation.rs index e9e559c..121ddd3 100644 --- a/src/mediation/escalation.rs +++ b/src/mediation/escalation.rs @@ -64,6 +64,11 @@ const ESCALATABLE_STATES: &[&str] = &[ "classified", "follow_up_pending", "summary_pending", + // Feature 005 — `summary_delivered → escalation_recommended` is + // legal so a party reply that opts in to human assistance after + // the cooperative invitation can lift the session out of the + // post-summary state into the Phase 4 handoff queue. + "summary_delivered", ]; /// Phase 4 handoff package. Persisted as the `handoff_prepared` diff --git a/src/mediation/follow_up.rs b/src/mediation/follow_up.rs index 551c32f..a625f0c 100644 --- a/src/mediation/follow_up.rs +++ b/src/mediation/follow_up.rs @@ -78,14 +78,15 @@ use crate::error::Result; use crate::models::dispute::InitiatorRole; use crate::models::mediation::{EscalationTrigger, MediationSessionState}; use crate::models::reasoning::{ClassificationRequest, ReasoningContext}; -use crate::models::SolverConfig; +use crate::models::{MediationConfig, SolverConfig}; use crate::prompts::PromptBundle; use crate::reasoning::ReasoningProvider; use super::{ deliver_summary, draft_and_send_followup_message, escalation, notify_solvers_escalation, - policy, transcript, SessionKeyCache, + policy, self_resolution, transcript, SessionKeyCache, }; +use crate::chat::outbound; /// Hard cap on transcript rows passed to the classifier (FR-128). /// Guards against runaway token costs on a session that accumulates @@ -121,6 +122,7 @@ pub async fn advance_session_round( solvers: &[SolverConfig], provider_name: &str, model_name: &str, + mediation_cfg: &MediationConfig, ) -> Result<()> { // (1) Load session metadata + idempotency gate. let info = match load_session_info(conn, session_id).await? { @@ -191,10 +193,17 @@ pub async fn advance_session_round( } }; - // (4) Transcript. - let transcript_entries = { + // (4) Transcript + cooperative-invitation flag (FR-008). + // The flag drives the conditional `human_requested` + // instruction block on the classifier prompt — only sessions + // that have already received the cooperative invitation pay + // the prompt-token cost of asking the model to detect + // human-assistance requests. + let (transcript_entries, prior_self_resolution_offered) = { let guard = conn.lock().await; - transcript::load_transcript_for_session(&guard, session_id, TRANSCRIPT_CAP)? + let entries = transcript::load_transcript_for_session(&guard, session_id, TRANSCRIPT_CAP)?; + let flag = db::mediation_events::session_has_self_resolution_offered(&guard, session_id)?; + (entries, flag) }; // (5) Classify. On failure, bump + (maybe) escalate. @@ -211,6 +220,7 @@ pub async fn advance_session_round( // absent. A future slice can plumb them. last_classification: None, last_confidence: None, + session_has_self_resolution_offered: prior_self_resolution_offered, }, }; let classification = match reasoning.classify(classification_req).await { @@ -253,8 +263,9 @@ pub async fn advance_session_round( prompt_bundle, provider_name, model_name, - classification, + classification.clone(), followup_number, + mediation_cfg, ) .await { @@ -395,6 +406,120 @@ pub async fn advance_session_round( "advance_session_round: Summarize dispatched" ); } + policy::PolicyDecision::SuggestSelfResolutionWithSummary { confidence } => { + // Feature 005 dispatch: cooperative self-resolution + // invitation. Order of operations matches the contract + // in `specs/005-cooperative-self-resolution/contracts/audit-events.md`: + // + // 1. Resolve per-party language codes from the + // classifier's structured response. + // 2. Render each party's invitation from the static + // bundle templates. + // 3. Open a transaction: write the + // `self_resolution_offered` audit row + insert two + // `mediation_messages` rows (audience-tagged). + // 4. Commit, then publish the gift-wraps OUTSIDE the + // transaction (matches the existing initial / + // follow-up drafter pattern — failure to publish + // leaves the rows committed as a historical record). + // 5. Pre-flip `awaiting_response → classified` and call + // `deliver_summary` so the solver still receives the + // existing `mediation_summary` notification. + let buyer_lang = classification.buyer_language.as_deref(); + let seller_lang = classification.seller_language.as_deref(); + // Pull the rationale id of the producing + // classification — `policy::evaluate` already wrote it + // before returning the decision. + let rationale_id = { + let guard = conn.lock().await; + latest_classification_rationale_id(&guard, session_id)? + }; + if let Err(e) = draft_and_send_self_resolution_invitation( + conn, + client, + serbero_keys, + session_id, + confidence, + buyer_lang, + seller_lang, + &material.buyer_shared_keys, + &material.seller_shared_keys, + prompt_bundle, + rationale_id.as_deref(), + ) + .await + { + warn!( + error = %e, + "advance_session_round: self-resolution invitation drafter failed" + ); + handle_reasoning_failure( + conn, + client, + session_id, + &info.dispute_id, + solvers, + prompt_bundle, + ) + .await; + return Ok(()); + } + // Pre-flip awaiting_response → classified so + // `deliver_summary`'s `classified → summary_pending` is + // a legal transition. Same pattern as the legacy + // Summarize arm above. + { + let guard = conn.lock().await; + db::mediation::set_session_state( + &guard, + session_id, + MediationSessionState::Classified, + super::current_ts_secs()?, + )?; + } + if let Err(e) = deliver_summary( + conn, + client, + serbero_keys, + session_id, + &info.dispute_id, + crate::models::mediation::ClassificationLabel::CoordinationFailureResolvable, + confidence, + transcript_entries, + prompt_bundle, + reasoning, + solvers, + provider_name, + model_name, + ) + .await + { + warn!( + error = %e, + "advance_session_round: deliver_summary after self-resolution invitation failed" + ); + handle_reasoning_failure( + conn, + client, + session_id, + &info.dispute_id, + solvers, + prompt_bundle, + ) + .await; + return Ok(()); + } + let new_marker = total_fresh_inbounds; + let mut guard = conn.lock().await; + let tx = guard.transaction()?; + db::mediation::advance_evaluator_marker(&tx, session_id, new_marker)?; + tx.commit()?; + info!( + confidence, + round_count_marked = new_marker, + "advance_session_round: SuggestSelfResolutionWithSummary dispatched" + ); + } policy::PolicyDecision::Escalate(trigger) => { if let Err(e) = escalation::recommend(escalation::RecommendParams { conn, @@ -436,6 +561,175 @@ pub async fn advance_session_round( Ok(()) } +/// Latest `classification_produced` rationale id for a session. +/// Used by the cooperative-self-resolution dispatch arm to populate +/// the `self_resolution_offered` audit row's `rationale_id` column — +/// the policy layer wrote the row a moment earlier, so this lookup +/// always succeeds in practice. Returns `None` defensively (older +/// sessions / missing audit) so the caller can still proceed +/// without a rationale reference rather than panic. +fn latest_classification_rationale_id( + conn: &rusqlite::Connection, + session_id: &str, +) -> Result> { + let row = conn.query_row( + "SELECT rationale_id FROM mediation_events + WHERE session_id = ?1 AND kind = 'classification_produced' AND rationale_id IS NOT NULL + ORDER BY occurred_at DESC, id DESC LIMIT 1", + params![session_id], + |r| r.get::<_, Option>(0), + ); + match row { + Ok(opt) => Ok(opt), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(crate::error::Error::Db(e)), + } +} + +/// Feature 005 — write the cooperative-self-resolution invitation +/// gift-wraps + audit row. Patterned on +/// [`super::draft_and_send_followup_message`]: opens a single +/// transaction for both `mediation_messages` rows + the +/// `self_resolution_offered` audit row, commits, then publishes the +/// gift-wraps OUTSIDE the transaction. A relay-side publish failure +/// after commit leaves the rows in place as historical record — +/// matches the existing drafter discipline (FR-126 Non-Goals). +#[allow(clippy::too_many_arguments)] +async fn draft_and_send_self_resolution_invitation( + conn: &Arc>, + client: &Client, + serbero_keys: &Keys, + session_id: &str, + confidence: f64, + buyer_language: Option<&str>, + seller_language: Option<&str>, + buyer_shared_keys: &Keys, + seller_shared_keys: &Keys, + prompt_bundle: &Arc, + rationale_id: Option<&str>, +) -> Result<()> { + use crate::models::mediation::TranscriptParty; + + let buyer_msg = self_resolution::render_for(buyer_language, &prompt_bundle.self_resolution); + let seller_msg = self_resolution::render_for(seller_language, &prompt_bundle.self_resolution); + + let buyer_wrap = outbound::build_wrap_with_audience( + serbero_keys, + &buyer_shared_keys.public_key(), + &buyer_msg, + Some("buyer"), + ) + .await?; + let seller_wrap = outbound::build_wrap_with_audience( + serbero_keys, + &seller_shared_keys.public_key(), + &seller_msg, + Some("seller"), + ) + .await?; + + if buyer_wrap.inner_event_id == seller_wrap.inner_event_id { + return Err(crate::error::Error::ChatTransport( + "inner event ids collided across parties on cooperative invitation; refusing to \ + persist rows that would violate the dedup invariant" + .into(), + )); + } + + let buyer_shared_pubkey_hex = buyer_shared_keys.public_key().to_hex(); + let seller_shared_pubkey_hex = seller_shared_keys.public_key().to_hex(); + let buyer_inner_id_hex = buyer_wrap.inner_event_id.to_hex(); + let seller_inner_id_hex = seller_wrap.inner_event_id.to_hex(); + let now = super::current_ts_secs()?; + + { + let mut guard = conn.lock().await; + let tx = guard.transaction()?; + db::mediation::insert_outbound_message( + &tx, + &db::mediation::NewOutboundMessage { + session_id, + party: TranscriptParty::Buyer, + shared_pubkey: &buyer_shared_pubkey_hex, + inner_event_id: &buyer_inner_id_hex, + inner_event_created_at: buyer_wrap.inner_created_at, + outer_event_id: Some(&buyer_wrap.outer.id.to_hex()), + content: &buyer_msg, + prompt_bundle_id: &prompt_bundle.id, + policy_hash: &prompt_bundle.policy_hash, + persisted_at: now, + }, + )?; + db::mediation::insert_outbound_message( + &tx, + &db::mediation::NewOutboundMessage { + session_id, + party: TranscriptParty::Seller, + shared_pubkey: &seller_shared_pubkey_hex, + inner_event_id: &seller_inner_id_hex, + inner_event_created_at: seller_wrap.inner_created_at, + outer_event_id: Some(&seller_wrap.outer.id.to_hex()), + content: &seller_msg, + prompt_bundle_id: &prompt_bundle.id, + policy_hash: &prompt_bundle.policy_hash, + persisted_at: now, + }, + )?; + // Self-resolution audit row. The `rationale_id` is the + // producing classification's content hash; the `confidence` + // and per-party language codes go into the structured + // payload so a forensic replay can reconstruct exactly which + // template section each party received. + db::mediation_events::record_self_resolution_offered( + &tx, + session_id, + rationale_id.unwrap_or(""), + confidence, + buyer_language, + seller_language, + &prompt_bundle.self_resolution.fallback_language, + &prompt_bundle.id, + &prompt_bundle.policy_hash, + now, + )?; + tx.commit()?; + } + + // Operational tracing for SC-001 baseline (T029). + let bid_for_log = prompt_bundle.id.clone(); + info!( + event = "cooperative_case_detected", + session_id = %session_id, + confidence, + prompt_bundle_id = %bid_for_log, + buyer_language = buyer_language.unwrap_or("(none)"), + seller_language = seller_language.unwrap_or("(none)"), + occurred_at_unix = now, + "cooperative_case_detected" + ); + + super::session::publish_with_bounded_retry(client, &buyer_wrap.outer, "buyer").await?; + super::record_outbound_sent_audit( + conn, + session_id, + &buyer_shared_pubkey_hex, + &buyer_inner_id_hex, + prompt_bundle, + ) + .await?; + super::session::publish_with_bounded_retry(client, &seller_wrap.outer, "seller").await?; + super::record_outbound_sent_audit( + conn, + session_id, + &seller_shared_pubkey_hex, + &seller_inner_id_hex, + prompt_bundle, + ) + .await?; + + Ok(()) +} + /// One read of everything `advance_session_round` needs from the /// session row. Batched into a single SELECT so the async mutex /// lock is held for one query rather than four. @@ -631,6 +925,7 @@ mod tests { escalation: String::new(), mediation_style: String::new(), message_templates: String::new(), + self_resolution: crate::mediation::self_resolution::SelfResolutionTemplates::default(), }) } @@ -767,6 +1062,7 @@ mod tests { &[], "mock-provider", "mock-model", + &MediationConfig::default(), ) .await .unwrap(); diff --git a/src/mediation/mod.rs b/src/mediation/mod.rs index 4e641e2..c5b44c4 100644 --- a/src/mediation/mod.rs +++ b/src/mediation/mod.rs @@ -25,6 +25,7 @@ pub mod follow_up; pub mod policy; pub mod report; pub mod router; +pub mod self_resolution; pub mod session; pub mod start; pub mod summarizer; @@ -2022,6 +2023,7 @@ async fn run_ingest_tick( solvers, provider_name, model_name, + mediation_cfg, ) .await .unwrap_or_else(|e| { diff --git a/src/mediation/policy.rs b/src/mediation/policy.rs index b2d443e..9e25e98 100644 --- a/src/mediation/policy.rs +++ b/src/mediation/policy.rs @@ -35,10 +35,11 @@ use tracing::{debug, warn}; use crate::db; use crate::error::Result; use crate::models::dispute::InitiatorRole; -use crate::models::mediation::{EscalationTrigger, Flag}; +use crate::models::mediation::{ClassificationLabel, EscalationTrigger, Flag}; use crate::models::reasoning::{ ClassificationRequest, ClassificationResponse, ReasoningContext, SuggestedAction, }; +use crate::models::MediationConfig; use crate::prompts::PromptBundle; use crate::reasoning::ReasoningProvider; @@ -68,6 +69,21 @@ pub enum PolicyDecision { classification: crate::models::mediation::ClassificationLabel, confidence: f64, }, + /// FR-001 — cooperative self-resolution invitation. + /// + /// Triggered when the model classifies a session as + /// `coordination_failure_resolvable` with `suggested_action = + /// summarize` AND the configured confidence floor is met AND the + /// kill-switch is on AND no prior `self_resolution_offered` audit + /// row exists for the session. The dispatch arm sends a + /// per-party templated invitation in each detected language + /// (with a human-assistance opt-in sentence) AND delivers the + /// existing solver summary with `suggested_next_step = + /// "self_resolution_offered_to_parties"`. When any of the + /// pre-conditions fails, the legacy `Summarize` branch fires + /// instead — preserving byte-for-byte legacy behaviour with the + /// kill-switch off (SC-007). + SuggestSelfResolutionWithSummary { confidence: f64 }, /// Escalate to a human solver with the given trigger. The /// mediation engine translates this into a Phase 4 handoff. Escalate(EscalationTrigger), @@ -116,6 +132,7 @@ pub async fn initial_classification( round_count: 0, last_classification: None, last_confidence: None, + session_has_self_resolution_offered: false, }, }; @@ -235,10 +252,12 @@ pub async fn evaluate( model_name: &str, classification: ClassificationResponse, followup_number: u32, + mediation_cfg: &MediationConfig, ) -> Result { - let decision = - classify_to_decision(&classification, PolicyRound::MidSession { followup_number }); - + // Persist the audit trail FIRST so any returned decision is + // already durable. Identical to the legacy shape — the new + // branches below only choose a different variant; they do not + // skip the rationale row. let rationale_id = persist_classification_audit( conn, session_id, @@ -249,6 +268,63 @@ pub async fn evaluate( ) .await?; + // Predicate guard for both Feature 005 branches. + let prior_offered = { + let guard = conn.lock().await; + db::mediation_events::session_has_self_resolution_offered(&guard, session_id)? + }; + + // Branch 1 (FR-008, T023): explicit human-assistance request + // after the cooperative invitation. Short-circuits the + // classification-label dispatch so a `conflicting_claims` round + // that ALSO carries `human_requested = true` still escalates as + // `PartyRequestedHuman` — the party's explicit ask wins. The + // predicate guard scopes the short-circuit to sessions that + // actually received the invitation, so an adversarial party + // cannot use the field to skip mediation on round 0 and a buggy + // provider that emits the field where the prompt did not request + // it cannot accidentally trigger the path. + if classification.human_requested && prior_offered { + debug!( + session_id = %session_id, + rationale_id = %rationale_id, + "evaluate: human_requested + prior self_resolution_offered → escalate(party_requested_human)" + ); + return Ok(PolicyDecision::Escalate( + EscalationTrigger::PartyRequestedHuman, + )); + } + + let base_decision = + classify_to_decision(&classification, PolicyRound::MidSession { followup_number }); + + // Branch 2 (FR-001 / FR-006 / FR-010 / FR-011, T016): cooperative + // self-resolution rewrite. Triggered when the legacy + // `classify_to_decision` would have returned a cooperative + // `Summarize` AND every feature pre-condition holds. When any + // pre-condition fails the legacy decision passes through + // unchanged — preserving byte-for-byte legacy behaviour with the + // kill-switch off (SC-007). + let decision = match base_decision { + PolicyDecision::Summarize { + classification: ClassificationLabel::CoordinationFailureResolvable, + confidence, + } if mediation_cfg.self_resolution_enabled + && (confidence as f32) >= mediation_cfg.self_resolution_threshold + && !prior_offered => + { + debug!( + session_id = %session_id, + rationale_id = %rationale_id, + confidence, + threshold = mediation_cfg.self_resolution_threshold, + "evaluate: cooperative self-resolution branch eligible → SuggestSelfResolutionWithSummary" + ); + PolicyDecision::SuggestSelfResolutionWithSummary { confidence } + } + other => other, + }; + debug!( session_id = %session_id, classification = %classification.classification, @@ -510,6 +586,7 @@ pub async fn classify_for_start( round_count: 0, last_classification: None, last_confidence: None, + session_has_self_resolution_offered: false, }, }; @@ -756,6 +833,7 @@ mod tests { escalation: "esc".into(), mediation_style: "style".into(), message_templates: "tpl".into(), + self_resolution: crate::mediation::self_resolution::SelfResolutionTemplates::default(), }) } @@ -769,6 +847,9 @@ mod tests { }, rationale: RationaleText("rationale body".into()), flags: Vec::new(), + human_requested: false, + buyer_language: None, + seller_language: None, } } @@ -1059,6 +1140,16 @@ mod tests { // event) without going through a reasoning-provider stub. // ------------------------------------------------------------------ + /// Disabled cooperative-self-resolution branch so legacy tests + /// preserve their previous semantics. Cooperative-branch + /// behavior is exercised in dedicated tests further down. + fn legacy_mediation_cfg() -> MediationConfig { + MediationConfig { + self_resolution_enabled: false, + ..MediationConfig::default() + } + } + async fn run_evaluate( conn: &Arc>, classification: ClassificationResponse, @@ -1084,6 +1175,27 @@ mod tests { "gpt-test", classification, followup_number, + &legacy_mediation_cfg(), + ) + .await + } + + async fn run_evaluate_with_cfg( + conn: &Arc>, + classification: ClassificationResponse, + followup_number: u32, + cfg: &MediationConfig, + ) -> Result { + let bundle = test_bundle(); + evaluate( + conn, + "sess-policy", + &bundle, + "openai", + "gpt-test", + classification, + followup_number, + cfg, ) .await } @@ -1261,4 +1373,178 @@ mod tests { assert_eq!(rat_count, 1, "rationale audit row expected"); assert_eq!(evt_count, 1, "classification_produced event expected"); } + + // ------------------------------------------------------------------ + // Feature 005 — cooperative self-resolution branch (T016) + + // human-assistance opt-in short-circuit (T023). Tests pin the + // pre-condition logic so a future refactor can't accidentally + // weaken the kill-switch / threshold / one-shot guards. + // ------------------------------------------------------------------ + + fn enabled_cooperative_cfg(threshold: f32) -> MediationConfig { + MediationConfig { + self_resolution_enabled: true, + self_resolution_threshold: threshold, + ..MediationConfig::default() + } + } + + fn cooperative_summary_response(confidence: f64) -> ClassificationResponse { + let mut resp = base_response(); + resp.classification = ClassificationLabel::CoordinationFailureResolvable; + resp.suggested_action = SuggestedAction::Summarize; + resp.confidence = confidence; + resp + } + + async fn seed_self_resolution_offered_row(conn: &Arc>) { + let guard = conn.lock().await; + guard + .execute( + "INSERT INTO mediation_events ( + session_id, kind, payload_json, occurred_at + ) VALUES ('sess-policy', 'self_resolution_offered', '{}', 50)", + [], + ) + .unwrap(); + } + + #[tokio::test] + async fn evaluate_cooperative_branch_fires_when_all_preconditions_hold() { + let conn = fresh_conn(); + let cfg = enabled_cooperative_cfg(0.75); + let resp = cooperative_summary_response(0.85); + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + assert_eq!( + decision, + PolicyDecision::SuggestSelfResolutionWithSummary { confidence: 0.85 }, + ); + } + + #[tokio::test] + async fn evaluate_cooperative_branch_inclusive_at_threshold() { + // FR-010 / R-007: the threshold check is `>=` so a confidence + // exactly equal to the configured floor still fires. + let conn = fresh_conn(); + let cfg = enabled_cooperative_cfg(0.75); + let resp = cooperative_summary_response(0.75); + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + assert_eq!( + decision, + PolicyDecision::SuggestSelfResolutionWithSummary { confidence: 0.75 }, + ); + } + + #[tokio::test] + async fn evaluate_cooperative_branch_falls_through_below_threshold() { + let conn = fresh_conn(); + let cfg = enabled_cooperative_cfg(0.90); + let resp = cooperative_summary_response(0.80); + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + // Falls through to the legacy cooperative-summary path. + assert_eq!( + decision, + PolicyDecision::Summarize { + classification: ClassificationLabel::CoordinationFailureResolvable, + confidence: 0.80, + }, + ); + } + + #[tokio::test] + async fn evaluate_cooperative_branch_skipped_when_kill_switch_off() { + // SC-007: with `self_resolution_enabled = false`, the legacy + // cooperative-summary path runs unchanged. + let conn = fresh_conn(); + let cfg = MediationConfig { + self_resolution_enabled: false, + self_resolution_threshold: 0.75, + ..MediationConfig::default() + }; + let resp = cooperative_summary_response(0.99); + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + assert_eq!( + decision, + PolicyDecision::Summarize { + classification: ClassificationLabel::CoordinationFailureResolvable, + confidence: 0.99, + }, + ); + } + + #[tokio::test] + async fn evaluate_cooperative_branch_one_shot_guard() { + // FR-006 / SC-006: a session that already has a + // `self_resolution_offered` audit row MUST NOT receive a + // second invitation; the legacy summarize path takes over. + let conn = fresh_conn(); + seed_self_resolution_offered_row(&conn).await; + let cfg = enabled_cooperative_cfg(0.75); + let resp = cooperative_summary_response(0.95); + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + assert_eq!( + decision, + PolicyDecision::Summarize { + classification: ClassificationLabel::CoordinationFailureResolvable, + confidence: 0.95, + }, + ); + } + + #[tokio::test] + async fn evaluate_human_requested_short_circuit_after_invitation() { + // FR-008 / T023: an explicit human-assistance request after + // the cooperative invitation escalates regardless of the + // round's classification label. + let conn = fresh_conn(); + seed_self_resolution_offered_row(&conn).await; + let cfg = enabled_cooperative_cfg(0.75); + let mut resp = base_response(); + resp.human_requested = true; + resp.classification = ClassificationLabel::ConflictingClaims; // would normally escalate, but trigger differs + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + assert_eq!( + decision, + PolicyDecision::Escalate(EscalationTrigger::PartyRequestedHuman), + ); + } + + #[tokio::test] + async fn evaluate_human_requested_ignored_without_prior_invitation() { + // Defence in depth: even if a buggy provider sets + // `human_requested = true` on a round where the prompt did + // not request it, the policy must NOT escalate as + // `PartyRequestedHuman` unless a prior `self_resolution_offered` + // row exists for the session. + let conn = fresh_conn(); + let cfg = enabled_cooperative_cfg(0.75); + let mut resp = base_response(); + resp.human_requested = true; // no seeded self_resolution_offered row + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + assert_eq!( + decision, + PolicyDecision::AskClarification { + buyer_text: "please confirm X (buyer)".into(), + seller_text: "please confirm X (seller)".into(), + }, + ); + } + + #[tokio::test] + async fn evaluate_no_lock_in_after_invitation_for_non_cooperative_label() { + // US3: a non-cooperative classification on a round following + // the cooperative invitation MUST escalate under its own + // standard trigger, NOT under PartyRequestedHuman. + let conn = fresh_conn(); + seed_self_resolution_offered_row(&conn).await; + let cfg = enabled_cooperative_cfg(0.75); + let mut resp = base_response(); + resp.classification = ClassificationLabel::ConflictingClaims; + resp.flags = vec![Flag::ConflictingClaims]; + let decision = run_evaluate_with_cfg(&conn, resp, 4, &cfg).await.unwrap(); + assert_eq!( + decision, + PolicyDecision::Escalate(EscalationTrigger::ConflictingClaims), + ); + } } diff --git a/src/mediation/self_resolution.rs b/src/mediation/self_resolution.rs new file mode 100644 index 0000000..bc7f39a --- /dev/null +++ b/src/mediation/self_resolution.rs @@ -0,0 +1,194 @@ +//! Cooperative self-resolution invitation templates and renderer +//! (Feature 005). +//! +//! Mirrors `specs/005-cooperative-self-resolution/contracts/template-bundle.md`. +//! The party-facing text Serbero sends on a high-confidence +//! `coordination_failure_resolvable` round comes from this module — +//! the LLM only decides *whether* to fire the branch, never *what* +//! to say. Static templates avoid two failure modes the spec calls +//! out: +//! +//! 1. Prompt-injection attempts to coax fund-action wording cannot +//! succeed because the templates never round-trip through the +//! model. +//! 2. Translation drift across language sections is bounded by the +//! keyword-audit unit test in +//! `tests/phase3_self_resolution_template_audit.rs`, which refuses +//! to merge a bundle that contains a banned fund-action keyword +//! in any language section. +//! +//! All functions in this module are pure (no I/O, no async). The +//! prompt-bundle loader populates [`SelfResolutionTemplates`]; the +//! follow-up dispatch arm in +//! [`crate::mediation::follow_up`] reads the per-party language +//! codes from the structured classifier response and calls +//! [`render_for`] to produce the final per-party message body. + +use std::collections::HashMap; + +/// One language entry in the self-resolution bundle. Both fields +/// are byte-identical to the corresponding Markdown `template = "…"` +/// / `human_assistance_optin = "…"` line in +/// `prompts/phase3-self-resolution.md`. +#[derive(Debug, Clone)] +pub struct SelfResolutionLanguageEntry { + /// The neutral coordination invitation. MUST NOT name a + /// fund-moving action (FR-004). Enforced by the keyword-audit + /// unit test. + pub template: String, + /// One sentence offering human assistance. Concatenated to + /// `template` (with a separating space) in [`render_for`]. + pub human_assistance_optin: String, +} + +/// All language entries in the bundle, keyed by ISO-639-1 code. +/// `fallback_language` MUST be a key of `by_language` (validated at +/// load time in [`crate::prompts::load_bundle`]). +#[derive(Debug, Clone)] +pub struct SelfResolutionTemplates { + pub by_language: HashMap, + pub fallback_language: String, +} + +impl Default for SelfResolutionTemplates { + /// Empty templates with `fallback_language = "en"`. Suitable for + /// unit-test fixtures of `PromptBundle` that don't exercise the + /// cooperative self-resolution path; production code paths + /// build the value via [`crate::prompts::self_resolution_parser::parse`] + /// from `prompts/phase3-self-resolution.md`, which validates + /// that the fallback language has a matching section. + fn default() -> Self { + Self { + by_language: HashMap::new(), + fallback_language: "en".to_string(), + } + } +} + +impl SelfResolutionTemplates { + /// Convenience: look up a language entry, falling back to the + /// configured `fallback_language` when the requested code is + /// `None` or absent. Returns `None` only when the bundle is + /// structurally invalid (no entry for the fallback either), + /// which the loader rejects at startup. + pub fn entry_for(&self, language_code: Option<&str>) -> Option<&SelfResolutionLanguageEntry> { + if let Some(code) = language_code { + let normalized = code.trim().to_ascii_lowercase(); + if let Some(entry) = self.by_language.get(&normalized) { + return Some(entry); + } + } + self.by_language.get(&self.fallback_language) + } +} + +/// Render the per-party invitation for the given language code. +/// +/// `language_code` is the ISO-639-1 string the classifier returned +/// (`buyer_language` or `seller_language` on +/// [`crate::models::reasoning::ClassificationResponse`]). `None` +/// falls back to `templates.fallback_language` per the contract; +/// likewise an unknown code (e.g. `"de"` against an +/// `[en]/[es]/[pt]` bundle) falls back rather than producing an +/// empty message. +/// +/// Output shape: `format!("{template} {optin}")`. The single space +/// separator is enough — both halves end with their own +/// punctuation. Forensic replay (per `quickstart.md`) reproduces +/// the same string by re-running this function on the bundle bytes +/// pinned by `mediation_events.policy_hash` for the +/// `self_resolution_offered` row. +pub fn render_for(language_code: Option<&str>, templates: &SelfResolutionTemplates) -> String { + match templates.entry_for(language_code) { + Some(entry) => format!("{} {}", entry.template, entry.human_assistance_optin), + None => { + // Structurally invalid bundle — should have been caught at + // load time. Render a deliberately ugly placeholder rather + // than panicking so the engine tick keeps running; the + // operator sees the breakage in the relayed message body + // and the audit row payload. + String::from( + "[serbero: self-resolution template bundle is missing the configured fallback language; \ + please ask the operator to verify prompts/phase3-self-resolution.md]", + ) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn fixture_bundle() -> SelfResolutionTemplates { + let mut by = HashMap::new(); + by.insert( + "en".into(), + SelfResolutionLanguageEntry { + template: "Thanks for the update. It looks like you may be able to coordinate the next step between yourselves.".into(), + human_assistance_optin: "If you'd prefer human assistance, let me know and I'll route you to the assigned solver.".into(), + }, + ); + by.insert( + "es".into(), + SelfResolutionLanguageEntry { + template: "Gracias por la actualización. Parece que podrían coordinar el siguiente paso entre ustedes.".into(), + human_assistance_optin: "Si prefieres asistencia humana, dímelo y te conecto con la persona asignada.".into(), + }, + ); + SelfResolutionTemplates { + by_language: by, + fallback_language: "en".into(), + } + } + + #[test] + fn render_known_language() { + let bundle = fixture_bundle(); + let out = render_for(Some("es"), &bundle); + assert!(out.starts_with("Gracias")); + assert!(out.contains("asistencia humana")); + } + + #[test] + fn render_falls_back_when_language_unknown() { + let bundle = fixture_bundle(); + let out = render_for(Some("de"), &bundle); + assert!(out.starts_with("Thanks for the update")); + } + + #[test] + fn render_falls_back_when_language_none() { + let bundle = fixture_bundle(); + let out = render_for(None, &bundle); + assert!(out.starts_with("Thanks for the update")); + } + + #[test] + fn render_normalizes_case_and_whitespace() { + let bundle = fixture_bundle(); + let upper = render_for(Some("ES"), &bundle); + let padded = render_for(Some(" es "), &bundle); + let lower = render_for(Some("es"), &bundle); + assert_eq!(upper, lower); + assert_eq!(padded, lower); + } + + #[test] + fn render_returns_placeholder_when_bundle_lacks_fallback() { + let bundle = SelfResolutionTemplates { + by_language: HashMap::new(), + fallback_language: "en".into(), + }; + let out = render_for(Some("en"), &bundle); + assert!(out.starts_with("[serbero:")); + } + + #[test] + fn entry_for_returns_fallback_when_code_absent() { + let bundle = fixture_bundle(); + let entry = bundle + .entry_for(Some("xyz")) + .expect("fallback entry must exist"); + assert!(entry.template.starts_with("Thanks")); + } +} diff --git a/src/mediation/session.rs b/src/mediation/session.rs index 0ba4dbc..ccae435 100644 --- a/src/mediation/session.rs +++ b/src/mediation/session.rs @@ -294,6 +294,23 @@ pub async fn open_session(params: OpenSessionParams<'_>) -> Result "decision": "escalate", "trigger": trigger.to_string(), }), + // The cooperative self-resolution branch (Feature 005) + // is only added to `policy::evaluate(...)` (mid-session) + // — never to `classify_for_start`. The predicate guard + // requires a prior `self_resolution_offered` row, which + // cannot exist before the first session row commits. + // Reaching this arm would mean the policy layer + // misclassified an opening-round decision; treat + // defensively as a `summarize` audit shape. + PolicyDecision::SuggestSelfResolutionWithSummary { confidence } => { + serde_json::json!({ + "dispute_id": params.dispute_id, + "decision": "summarize", + "classification": "coordination_failure_resolvable", + "confidence": confidence, + "note": "unexpected_cooperative_branch_on_opening_round", + }) + } }; let guard = params.conn.lock().await; if let Err(e) = db::mediation_events::record_event( @@ -619,6 +636,41 @@ pub async fn open_session(params: OpenSessionParams<'_>) -> Result confidence, }) } + // Feature 005: defensive arm. The cooperative branch is only + // installed in `policy::evaluate(...)` (mid-session); the + // `classify_for_start` path will never produce this variant + // because its predicate guard requires a prior + // `self_resolution_offered` audit row, which cannot exist + // before the first session row commits. If we ever do reach + // this arm, route to the legacy cooperative-summary outcome + // so the engine still makes forward progress. + PolicyDecision::SuggestSelfResolutionWithSummary { confidence } => { + let now = current_ts_secs()?; + { + let guard = params.conn.lock().await; + db::mediation::set_session_state( + &guard, + &session_id, + crate::models::mediation::MediationSessionState::Classified, + now, + )?; + } + if let Some(cache) = params.session_key_cache { + register_session_material(cache, &session_id, material.clone()).await; + } + warn!( + session_id = %session_id, + confidence, + "classify_for_start unexpectedly returned cooperative-self-resolution \ + branch on opening round; treating as legacy cooperative summary" + ); + Ok(OpenOutcome::ReadyForSummary { + session_id, + classification: + crate::models::mediation::ClassificationLabel::CoordinationFailureResolvable, + confidence, + }) + } PolicyDecision::Escalate(_) => { // Unreachable: the FR-122 flow above handles the // Escalate verdict before the take step and returns @@ -1006,6 +1058,7 @@ mod tests { escalation: "esc".into(), mediation_style: "style".into(), message_templates: "tpl".into(), + self_resolution: crate::mediation::self_resolution::SelfResolutionTemplates::default(), }) } diff --git a/src/models/config.rs b/src/models/config.rs index 9b60e1e..6e69fa5 100644 --- a/src/models/config.rs +++ b/src/models/config.rs @@ -122,6 +122,20 @@ pub struct MediationConfig { pub solver_auth_retry_max_total_seconds: u64, #[serde(default = "default_solver_auth_retry_max_attempts")] pub solver_auth_retry_max_attempts: u32, + + // --- Feature 005 (cooperative self-resolution) --- + /// Confidence floor at which Serbero invites parties to coordinate + /// the resolution among themselves on a + /// `coordination_failure_resolvable` classification. Range + /// `0.0..=1.0`; validated at load time. FR-010. + #[serde(default = "default_self_resolution_threshold")] + pub self_resolution_threshold: f32, + /// Master kill-switch for the cooperative self-resolution + /// branch. `false` bypasses the branch entirely and Serbero + /// behaves byte-for-byte as before this feature shipped (SC-007). + /// FR-011. + #[serde(default = "default_self_resolution_enabled")] + pub self_resolution_enabled: bool, } impl Default for MediationConfig { @@ -135,6 +149,8 @@ impl Default for MediationConfig { ), solver_auth_retry_max_total_seconds: default_solver_auth_retry_max_total_seconds(), solver_auth_retry_max_attempts: default_solver_auth_retry_max_attempts(), + self_resolution_threshold: default_self_resolution_threshold(), + self_resolution_enabled: default_self_resolution_enabled(), } } } @@ -158,6 +174,13 @@ fn default_solver_auth_retry_max_attempts() -> u32 { 24 } +fn default_self_resolution_threshold() -> f32 { + 0.75 +} +fn default_self_resolution_enabled() -> bool { + true +} + #[derive(Debug, Clone, Deserialize)] pub struct ReasoningConfig { #[serde(default)] diff --git a/src/models/mediation.rs b/src/models/mediation.rs index 9fadf4e..6b3ccdd 100644 --- a/src/models/mediation.rs +++ b/src/models/mediation.rs @@ -47,6 +47,16 @@ impl MediationSessionState { | (Classified, EscalationRecommended) | (FollowUpPending, EscalationRecommended) | (SummaryPending, EscalationRecommended) + // Cooperative-self-resolution opt-in (FR-008): after + // the invitation lands the session sits in + // `summary_delivered` waiting for either + // dispute_resolved (legacy close path) or a party + // reply that asks for human assistance. The latter + // routes through `policy::evaluate` → + // `Escalate(PartyRequestedHuman)` and re-opens the + // session into `escalation_recommended` for Phase 4 + // dispatch. + | (SummaryDelivered, EscalationRecommended) | (EscalationRecommended, Closed) // Superseded by human taking the dispute via Mostro. | (Opening, SupersededByHuman) @@ -131,6 +141,13 @@ pub enum EscalationTrigger { /// the audit trail instead of leaving it stranded at /// `summary_pending`. NotificationFailed, + /// FR-008 — a party explicitly requested human assistance after + /// the cooperative self-resolution invitation was sent. The + /// classifier flag short-circuits `policy::evaluate` to + /// `Escalate(PartyRequestedHuman)` regardless of the round's + /// classification label, so the assigned solver picks up the + /// case via the existing Phase 4 escalation pipeline. + PartyRequestedHuman, } impl fmt::Display for EscalationTrigger { @@ -149,6 +166,7 @@ impl fmt::Display for EscalationTrigger { PolicyBundleMissing => "policy_bundle_missing", InvalidModelOutput => "invalid_model_output", NotificationFailed => "notification_failed", + PartyRequestedHuman => "party_requested_human", }; f.write_str(s) } @@ -265,6 +283,11 @@ mod tests { assert!(SummaryPending.can_transition_to(SummaryDelivered)); assert!(SummaryDelivered.can_transition_to(Closed)); assert!(Opening.can_transition_to(EscalationRecommended)); + // Cooperative-self-resolution opt-in: a party reply that + // explicitly asks for a human after the invitation must be + // able to lift the session out of `summary_delivered` into + // `escalation_recommended` (FR-008). + assert!(SummaryDelivered.can_transition_to(EscalationRecommended)); assert!(EscalationRecommended.can_transition_to(Closed)); assert!(AwaitingResponse.can_transition_to(SupersededByHuman)); assert!(SupersededByHuman.can_transition_to(Closed)); @@ -278,6 +301,9 @@ mod tests { assert!(!AwaitingResponse.can_transition_to(AwaitingResponse)); // self assert!(!Classified.can_transition_to(Opening)); assert!(!EscalationRecommended.can_transition_to(AwaitingResponse)); + // The cooperative-opt-in edge is one-way: an escalated + // session never falls back into `summary_delivered`. + assert!(!EscalationRecommended.can_transition_to(SummaryDelivered)); } #[test] diff --git a/src/models/reasoning.rs b/src/models/reasoning.rs index 2b75598..faae480 100644 --- a/src/models/reasoning.rs +++ b/src/models/reasoning.rs @@ -22,11 +22,18 @@ pub struct TranscriptEntry { } /// Context shared across reasoning calls in the same session. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct ReasoningContext { pub round_count: u32, pub last_classification: Option, pub last_confidence: Option, + /// FR-008. `true` iff a `self_resolution_offered` audit row + /// already exists for this session. Drives the conditional + /// `human_requested` instruction block in the classifier prompt: + /// when true, the prompt asks the model to flag explicit + /// human-assistance requests so `policy::evaluate` can + /// short-circuit to `Escalate(PartyRequestedHuman)`. + pub session_has_self_resolution_offered: bool, } /// Classification request. @@ -88,6 +95,12 @@ impl fmt::Debug for RationaleText { } /// Classification response. +/// +/// The three optional language / opt-in fields below are additive +/// extensions for Feature 005 (cooperative self-resolution). Adapters +/// that haven't been updated to emit them simply leave the defaults +/// (`false` / `None`); the policy short-circuit and the dispatch +/// arm both handle missing values gracefully. #[derive(Debug, Clone)] pub struct ClassificationResponse { pub classification: ClassificationLabel, @@ -95,6 +108,24 @@ pub struct ClassificationResponse { pub suggested_action: SuggestedAction, pub rationale: RationaleText, pub flags: Vec, + /// FR-008. Set by the classifier when a party reply explicitly + /// asks for human assistance. Honoured by `policy::evaluate` only + /// after a `self_resolution_offered` audit row exists for the + /// session — that predicate guard prevents an adversarial party + /// from skipping mediation by emitting human-assistance phrasing + /// on round 0 and prevents a buggy provider from triggering + /// escalation on rounds the prompt did not request the field. + pub human_requested: bool, + /// FR-002. ISO-639-1 code (`"en"`, `"es"`, `"pt"`, …) emitted by + /// the classifier alongside `buyer_clarification`. `None` when + /// the latest reply has no buyer content or is too short to + /// disambiguate. Consumed by the cooperative-self-resolution + /// dispatch arm to pick the right `[xx]` template section; falls + /// back to `bundle.self_resolution.fallback_language` when + /// absent. + pub buyer_language: Option, + /// FR-002. Same shape as `buyer_language`, for the seller. + pub seller_language: Option, } /// Summary request. diff --git a/src/prompts/hash.rs b/src/prompts/hash.rs index 0901d65..a43552e 100644 --- a/src/prompts/hash.rs +++ b/src/prompts/hash.rs @@ -35,6 +35,39 @@ pub fn policy_hash( hex_lower(&digest) } +/// Same shape as [`policy_hash`], plus the cooperative +/// self-resolution bundle bytes appended as a sixth segment. +/// Feature 005 ships its templates as a separate prompt file; the +/// hash MUST extend over those bytes so a forensic replay can pin +/// the exact rendered string per session. +pub fn policy_hash_v2( + system: &str, + classification: &str, + escalation: &str, + mediation_style: &str, + message_templates: &str, + self_resolution: &str, +) -> String { + let mut hasher = Sha256::new(); + hasher.update(PREFIX); + feed(&mut hasher, b"system", system.as_bytes()); + feed(&mut hasher, b"classification", classification.as_bytes()); + feed(&mut hasher, b"escalation", escalation.as_bytes()); + feed(&mut hasher, b"mediation_style", mediation_style.as_bytes()); + feed( + &mut hasher, + b"message_templates", + message_templates.as_bytes(), + ); + // Final segment uses no trailing delimiter, matching the v1 + // shape's last-segment rule. + hasher.update(b"self_resolution"); + hasher.update(b"\0"); + hasher.update(self_resolution.as_bytes()); + let digest = hasher.finalize(); + hex_lower(&digest) +} + fn feed(hasher: &mut Sha256, label: &[u8], bytes: &[u8]) { hasher.update(label); hasher.update(b"\0"); diff --git a/src/prompts/mod.rs b/src/prompts/mod.rs index 608be8f..fc770cf 100644 --- a/src/prompts/mod.rs +++ b/src/prompts/mod.rs @@ -8,10 +8,12 @@ //! MUST fail loudly — the caller leaves Phase 3 disabled for the run. pub mod hash; +pub mod self_resolution_parser; use std::path::Path; use crate::error::{Error, Result}; +use crate::mediation::self_resolution::SelfResolutionTemplates; use crate::models::PromptsConfig; /// A loaded, hashed Phase 3 prompt bundle. @@ -27,6 +29,11 @@ pub struct PromptBundle { pub escalation: String, pub mediation_style: String, pub message_templates: String, + /// Feature 005 — cooperative self-resolution language entries. + /// Loaded from `prompts/phase3-self-resolution.md`. The bundle's + /// `policy_hash` extends over the file's bytes so a forensic + /// replay can reproduce the exact rendered string per session. + pub self_resolution: SelfResolutionTemplates, } /// Load every file referenced by `[prompts]`, compute the bundle @@ -42,12 +49,50 @@ pub fn load_bundle(config: &PromptsConfig) -> Result { let mediation_style = read_file(&config.mediation_style_path, "mediation_style_path")?; let message_templates = read_file(&config.message_templates_path, "message_templates_path")?; - let policy_hash = hash::policy_hash( + // Feature 005: the cooperative self-resolution bundle file lives + // beside the existing prompt files. Path is derived from the + // configured `system_instructions_path` (replacing + // `phase3-system.md` with `phase3-self-resolution.md`) so + // operators don't need to add a new key for an existing + // deployment to pick the file up. + // + // Backwards-compatibility: a daemon upgrading from before this + // feature shipped will not yet have the file on disk. Rather + // than refuse to start, the loader logs a one-line warning and + // falls back to empty templates — the cooperative-self-resolution + // policy branch becomes a no-op (the `render_for` helper returns + // a placeholder that includes a clear operator message), and the + // legacy cooperative-summary path runs unchanged. The hash + // includes the (possibly empty) self-resolution bytes so the + // SC-103 invariant still holds. + let self_resolution_path = derive_self_resolution_path(&config.system_instructions_path); + let (self_resolution, self_resolution_raw) = match std::fs::read_to_string(Path::new( + &self_resolution_path, + )) { + Ok(raw) => match self_resolution_parser::parse(&raw) { + Ok(parsed) => (parsed, raw), + Err(e) => { + return Err(Error::PromptBundleLoad(format!( + "failed to parse self-resolution templates at {self_resolution_path}: {e}" + ))); + } + }, + Err(_) => { + tracing::warn!( + path = %self_resolution_path, + "phase3-self-resolution.md not found; cooperative-self-resolution branch will be inert until the file is added" + ); + (SelfResolutionTemplates::default(), String::new()) + } + }; + + let policy_hash = hash::policy_hash_v2( &system, &classification, &escalation, &mediation_style, &message_templates, + &self_resolution_raw, ); Ok(PromptBundle { @@ -58,9 +103,24 @@ pub fn load_bundle(config: &PromptsConfig) -> Result { escalation, mediation_style, message_templates, + self_resolution, }) } +/// Replace the trailing `phase3-system.md` filename in +/// `system_instructions_path` with `phase3-self-resolution.md`. If +/// the configured path doesn't end in the canonical filename (an +/// operator who renamed the bundle), fall back to a sibling file in +/// the same directory. +fn derive_self_resolution_path(system_path: &str) -> String { + let p = Path::new(system_path); + let parent = p.parent().unwrap_or_else(|| Path::new(".")); + parent + .join("phase3-self-resolution.md") + .to_string_lossy() + .into_owned() +} + fn read_file(path: &str, field: &str) -> Result { std::fs::read_to_string(Path::new(path)).map_err(|e| { Error::PromptBundleLoad(format!( diff --git a/src/prompts/self_resolution_parser.rs b/src/prompts/self_resolution_parser.rs new file mode 100644 index 0000000..cea60b4 --- /dev/null +++ b/src/prompts/self_resolution_parser.rs @@ -0,0 +1,242 @@ +//! Parser for the cooperative self-resolution prompt bundle file. +//! +//! Mirrors the on-disk format documented in +//! `specs/005-cooperative-self-resolution/contracts/template-bundle.md`: +//! +//! ```text +//! +//! +//! fallback_language = "en" +//! +//! [en] +//! template = "Thanks for the update. ..." +//! human_assistance_optin = "If you'd prefer human assistance ..." +//! +//! [es] +//! template = "Gracias por la actualización. ..." +//! human_assistance_optin = "Si prefieres asistencia humana ..." +//! ``` +//! +//! Implementation choice: parse the file as TOML. The contract's +//! syntax is a strict subset of TOML, and the existing crate already +//! depends on `toml` for `config.rs`, so no new dependency is +//! pulled. Markdown HTML-style comments (``) are stripped +//! before parsing because TOML's comment syntax (`#`) does not cover +//! them. + +use std::collections::HashMap; + +use serde::Deserialize; + +use crate::mediation::self_resolution::{SelfResolutionLanguageEntry, SelfResolutionTemplates}; + +#[derive(Debug, Deserialize)] +struct RawBundle { + fallback_language: String, + #[serde(flatten)] + languages: HashMap, +} + +#[derive(Debug, Deserialize)] +struct RawLanguageEntry { + template: String, + human_assistance_optin: String, +} + +/// Parse the self-resolution bundle file. Returns +/// `SelfResolutionTemplates` with the parsed entries; rejects +/// (with `Err`) any bundle that: +/// +/// - has no `fallback_language` key, +/// - has a `fallback_language` value not present in the language +/// sections, +/// - has any language section missing `template` or +/// `human_assistance_optin`, +/// - or fails to parse as TOML. +pub fn parse(raw: &str) -> Result { + let stripped = strip_html_comments(raw); + let parsed: RawBundle = + toml::from_str(&stripped).map_err(|e| format!("TOML parse error: {e}"))?; + + if parsed.fallback_language.trim().is_empty() { + return Err("fallback_language must not be empty".into()); + } + + let mut by_language = HashMap::with_capacity(parsed.languages.len()); + for (code, entry) in parsed.languages { + let normalized = code.trim().to_ascii_lowercase(); + if normalized.is_empty() { + return Err("language section keys must be non-empty".into()); + } + if entry.template.trim().is_empty() { + return Err(format!("[{normalized}] template must not be empty")); + } + if entry.human_assistance_optin.trim().is_empty() { + return Err(format!( + "[{normalized}] human_assistance_optin must not be empty" + )); + } + by_language.insert( + normalized, + SelfResolutionLanguageEntry { + template: entry.template, + human_assistance_optin: entry.human_assistance_optin, + }, + ); + } + + let fallback = parsed.fallback_language.trim().to_ascii_lowercase(); + if !by_language.contains_key(&fallback) { + return Err(format!( + "fallback_language `{fallback}` has no matching [{fallback}] section" + )); + } + + Ok(SelfResolutionTemplates { + by_language, + fallback_language: fallback, + }) +} + +/// Strip HTML-style Markdown comments (``) from the input +/// so the leftover bytes parse as valid TOML. Comments may span +/// multiple lines; nested comments are not supported (Markdown +/// doesn't allow them either). +fn strip_html_comments(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + let bytes = input.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i..].starts_with(b"") { + i += 4 + end + 3; + continue; + } else { + // Unterminated comment — keep the raw bytes so the + // TOML parser surfaces a useful error rather than + // silently dropping the rest of the file. + out.push_str(&input[i..]); + break; + } + } + // Push one UTF-8 char at a time so we don't slice mid-char. + let ch_len = utf8_char_len(bytes[i]); + let end = (i + ch_len).min(bytes.len()); + out.push_str(&input[i..end]); + i = end; + } + out +} + +fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option { + haystack.windows(needle.len()).position(|w| w == needle) +} + +fn utf8_char_len(byte: u8) -> usize { + // ASCII (`< 0x80`) and continuation bytes (`0x80..=0xBF`) both + // advance by one — the loop should not slice mid-codepoint, but + // a stray continuation byte at the start of input is a degenerate + // case where stepping forward by one is the only sensible + // recovery. Multi-byte starts (`0xC0..`) tell us the actual + // codepoint width. + if byte < 0xC0 { + 1 + } else if byte < 0xE0 { + 2 + } else if byte < 0xF0 { + 3 + } else { + 4 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn happy_path_with_three_languages() { + let raw = r#" +fallback_language = "en" + +[en] +template = "Thanks for the update." +human_assistance_optin = "If you'd prefer human assistance, let me know." + +[es] +template = "Gracias por la actualización." +human_assistance_optin = "Si prefieres asistencia humana, dímelo." + +[pt] +template = "Obrigado pela atualização." +human_assistance_optin = "Se preferir ajuda humana, me avise." +"#; + let bundle = parse(raw).unwrap(); + assert_eq!(bundle.fallback_language, "en"); + assert_eq!(bundle.by_language.len(), 3); + assert!(bundle.by_language["es"].template.starts_with("Gracias")); + } + + #[test] + fn html_comments_are_stripped() { + let raw = r#" + + +fallback_language = "en" + +[en] +template = "hi" +human_assistance_optin = "ok" +"#; + let bundle = parse(raw).unwrap(); + assert_eq!(bundle.fallback_language, "en"); + } + + #[test] + fn rejects_bundle_without_fallback_section() { + let raw = r#" +fallback_language = "de" + +[en] +template = "hi" +human_assistance_optin = "ok" +"#; + let err = parse(raw).unwrap_err(); + assert!(err.contains("fallback_language")); + } + + #[test] + fn rejects_empty_template_field() { + let raw = r#" +fallback_language = "en" + +[en] +template = "" +human_assistance_optin = "ok" +"#; + let err = parse(raw).unwrap_err(); + assert!(err.contains("template")); + } + + #[test] + fn rejects_invalid_toml() { + let raw = "this is not toml at all <<<<"; + let err = parse(raw).unwrap_err(); + assert!(err.contains("TOML")); + } + + #[test] + fn normalizes_language_keys_to_lowercase() { + let raw = r#" +fallback_language = "EN" + +[EN] +template = "hi" +human_assistance_optin = "ok" +"#; + let bundle = parse(raw).unwrap(); + assert_eq!(bundle.fallback_language, "en"); + assert!(bundle.by_language.contains_key("en")); + } +} diff --git a/src/reasoning/openai.rs b/src/reasoning/openai.rs index cb620c0..4ddaeea 100644 --- a/src/reasoning/openai.rs +++ b/src/reasoning/openai.rs @@ -212,6 +212,21 @@ struct ClassificationJson { rationale: String, #[serde(default)] flags: Vec, + /// Feature 005 — opt-in to human assistance after a + /// `self_resolution_offered` event. Defaulted to `false` so + /// rounds where the prompt did not request the field parse + /// cleanly (the policy short-circuit consults the + /// `session_has_self_resolution_offered` predicate too). + #[serde(default)] + human_requested: bool, + /// Feature 005 — buyer's detected language (ISO-639-1). The + /// runtime reads this directly off the structured response + /// rather than running its own language detection. + #[serde(default)] + buyer_language: Option, + /// Feature 005 — seller's detected language (ISO-639-1). + #[serde(default)] + seller_language: Option, } // --------------------------------------------------------------------------- @@ -472,6 +487,21 @@ pub(super) fn build_classification_prompt(r: &ClassificationRequest) -> String { .map(|e| format!("[{}] {}: {}", e.inner_event_created_at, e.party, e.content)) .collect::>() .join("\n"); + // Feature 005: only ask for the `human_requested` field on rounds + // following a `self_resolution_offered` audit row. Asking on every + // round wastes tokens and risks false positives (a buggy provider + // that emits the field where it has no contract to do so). + let human_requested_block = if r.context.session_has_self_resolution_offered { + "\n Additionally emit human_requested (boolean): set to true if and \ + only if the latest party reply contains an explicit, unambiguous \ + request for a human solver / mediator / arbitrator (examples: \"I want \ + a human\", \"necesito un humano\", \"please escalate to a person\", \ + \"que un humano lo revise\", \"preciso de um humano\"). Vague \ + phrasings like \"this is taking too long\" or \"I'm frustrated\" do \ + NOT count. When in doubt, set to false." + } else { + "" + }; format!( "## Session metadata\n\ session_id: {sid}\n\ @@ -491,6 +521,11 @@ pub(super) fn build_classification_prompt(r: &ClassificationRequest) -> String { confidence (0..1), suggested_action (ask_clarification|summarize|escalate), \ rationale (string), flags (array of fraud_risk|conflicting_claims|low_info|\ unresponsive_party|authority_boundary_attempt).\n\ + You MUST also emit buyer_language and seller_language (ISO-639-1 \ + codes such as \"en\", \"es\", \"pt\") inferred from each party's most \ + recent reply in the transcript. When the latest message has no \ + buyer (or seller) content or is too short to disambiguate, set \ + the corresponding field to null.\n\ When suggested_action = ask_clarification you MUST also return \ buyer_clarification (string, addressed to the buyer, asking what you need \ from the buyer to advance the case) and seller_clarification (string, \ @@ -502,7 +537,7 @@ pub(super) fn build_classification_prompt(r: &ClassificationRequest) -> String { if you cannot produce a useful question for one side, pick a different \ suggested_action (summarize or escalate). suggested_action_detail is \ optional and only used to carry the escalation reason when \ - suggested_action = escalate.", + suggested_action = escalate.{human_requested_block}", sid = r.session_id, did = r.dispute_id, init = r.initiator_role, @@ -682,12 +717,22 @@ pub(super) fn parse_classification( ))), }) .collect::>()?; + let normalize_lang = |s: Option| -> Option { + // Defensively trim + lowercase the language code so a + // classifier that returns `"ES"` or `" pt "` still matches + // the `[es]`/`[pt]` template sections. + s.map(|v| v.trim().to_ascii_lowercase()) + .filter(|v| !v.is_empty()) + }; Ok(ClassificationResponse { classification, confidence: parsed.confidence.clamp(0.0, 1.0), suggested_action, rationale: RationaleText(parsed.rationale), flags, + human_requested: parsed.human_requested, + buyer_language: normalize_lang(parsed.buyer_language), + seller_language: normalize_lang(parsed.seller_language), }) } @@ -1254,6 +1299,7 @@ mod tests { escalation: "ESCALATION_MARKER: escalation rules".to_string(), mediation_style: "STYLE_MARKER: neutral tone".to_string(), message_templates: "TEMPLATE_MARKER: templates here".to_string(), + self_resolution: crate::mediation::self_resolution::SelfResolutionTemplates::default(), }) } @@ -1269,6 +1315,7 @@ mod tests { round_count: 0, last_classification: None, last_confidence: None, + session_has_self_resolution_offered: false, }, }; let user = build_classification_prompt(&req); diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 953b479..3f88230 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -478,6 +478,9 @@ impl ReasoningProvider for MockReasoningProvider { }, rationale: RationaleText("both parties seem cooperative".into()), flags: Vec::new(), + human_requested: false, + buyer_language: None, + seller_language: None, }) } diff --git a/tests/phase3_authority_boundary.rs b/tests/phase3_authority_boundary.rs index 442b7d8..7d83dd5 100644 --- a/tests/phase3_authority_boundary.rs +++ b/tests/phase3_authority_boundary.rs @@ -29,6 +29,7 @@ fn test_bundle() -> Arc { escalation: "esc".into(), mediation_style: "style".into(), message_templates: "tpl".into(), + self_resolution: serbero::mediation::self_resolution::SelfResolutionTemplates::default(), }) } @@ -72,6 +73,9 @@ async fn authority_boundary_attempt_suppresses_and_escalates() { }, rationale: RationaleText("model tried to cross the authority boundary".into()), flags: vec![Flag::AuthorityBoundaryAttempt], + human_requested: false, + buyer_language: None, + seller_language: None, }; let decision = policy::evaluate( @@ -82,6 +86,7 @@ async fn authority_boundary_attempt_suppresses_and_escalates() { "gpt-test", classification, 1, + &serbero::models::MediationConfig::default(), ) .await .unwrap(); diff --git a/tests/phase3_escalation_triggers.rs b/tests/phase3_escalation_triggers.rs index eb0ae4e..21125c5 100644 --- a/tests/phase3_escalation_triggers.rs +++ b/tests/phase3_escalation_triggers.rs @@ -43,6 +43,7 @@ fn test_bundle() -> Arc { escalation: "esc".into(), mediation_style: "style".into(), message_templates: "tpl".into(), + self_resolution: serbero::mediation::self_resolution::SelfResolutionTemplates::default(), }) } @@ -56,6 +57,9 @@ fn base_response() -> ClassificationResponse { }, rationale: RationaleText("rationale body".into()), flags: Vec::new(), + human_requested: false, + buyer_language: None, + seller_language: None, } } @@ -136,9 +140,18 @@ async fn conflicting_claims_triggers_escalation() { let mut resp = base_response(); resp.flags = vec![Flag::ConflictingClaims]; - let decision = policy::evaluate(&conn, "sess-cc", &bundle, "openai", "gpt-test", resp, 1) - .await - .unwrap(); + let decision = policy::evaluate( + &conn, + "sess-cc", + &bundle, + "openai", + "gpt-test", + resp, + 1, + &serbero::models::MediationConfig::default(), + ) + .await + .unwrap(); assert_eq!( decision, PolicyDecision::Escalate(EscalationTrigger::ConflictingClaims) @@ -176,9 +189,18 @@ async fn fraud_indicator_triggers_escalation() { let mut resp = base_response(); resp.flags = vec![Flag::FraudRisk]; - let decision = policy::evaluate(&conn, "sess-fr", &bundle, "openai", "gpt-test", resp, 1) - .await - .unwrap(); + let decision = policy::evaluate( + &conn, + "sess-fr", + &bundle, + "openai", + "gpt-test", + resp, + 1, + &serbero::models::MediationConfig::default(), + ) + .await + .unwrap(); assert_eq!( decision, PolicyDecision::Escalate(EscalationTrigger::FraudIndicator) @@ -224,6 +246,7 @@ async fn low_confidence_triggers_escalation() { "gpt-test", resp, policy::EARLY_MIDSESSION_BYPASS_FOLLOWUPS + 1, + &serbero::models::MediationConfig::default(), ) .await .unwrap(); diff --git a/tests/phase3_followup_reasoning_failure.rs b/tests/phase3_followup_reasoning_failure.rs index d5ba0aa..4d17af5 100644 --- a/tests/phase3_followup_reasoning_failure.rs +++ b/tests/phase3_followup_reasoning_failure.rs @@ -279,6 +279,7 @@ async fn three_consecutive_classify_failures_escalate_reasoning_unavailable() { std::slice::from_ref(&solver_cfg), "mock-provider", "mock-model", + &serbero::models::MediationConfig::default(), ) .await .unwrap_or_else(|e| { diff --git a/tests/phase3_followup_round.rs b/tests/phase3_followup_round.rs index 82d5e3e..1405ec2 100644 --- a/tests/phase3_followup_round.rs +++ b/tests/phase3_followup_round.rs @@ -289,6 +289,7 @@ async fn second_round_outbound_fires_once_and_is_idempotent() { &[], // no solvers needed on the AskClarification branch "mock-provider", "mock-model", + &serbero::models::MediationConfig::default(), ) .await .expect("advance_session_round first call must succeed"); @@ -379,6 +380,7 @@ async fn second_round_outbound_fires_once_and_is_idempotent() { &[], "mock-provider", "mock-model", + &serbero::models::MediationConfig::default(), ) .await .expect("advance_session_round second call must succeed as a no-op"); diff --git a/tests/phase3_followup_summary.rs b/tests/phase3_followup_summary.rs index a75df91..90b1cca 100644 --- a/tests/phase3_followup_summary.rs +++ b/tests/phase3_followup_summary.rs @@ -79,6 +79,9 @@ impl ReasoningProvider for SummarizingProvider { suggested_action: SuggestedAction::Summarize, rationale: RationaleText("parties appear aligned; recommending closure".into()), flags: Vec::new(), + human_requested: false, + buyer_language: None, + seller_language: None, }) } @@ -277,6 +280,14 @@ async fn summarize_branch_delivers_summary_once_and_closes_session() { suggested_next_step: "Solver should invoke AdminSettleDispute on Mostro.".into(), }); + // Disable the cooperative-self-resolution branch so this test + // continues to assert the legacy Summarize path. Feature 005 + // dedicated tests cover the cooperative dispatch separately. + let legacy_cfg = serbero::models::MediationConfig { + self_resolution_enabled: false, + ..serbero::models::MediationConfig::default() + }; + advance_session_round( &conn, &serbero_client, @@ -288,6 +299,7 @@ async fn summarize_branch_delivers_summary_once_and_closes_session() { std::slice::from_ref(&solver_cfg), "mock-provider", "mock-model", + &legacy_cfg, ) .await .expect("advance_session_round first call must succeed on Summarize branch"); @@ -407,6 +419,7 @@ async fn summarize_branch_delivers_summary_once_and_closes_session() { std::slice::from_ref(&solver_cfg), "mock-provider", "mock-model", + &legacy_cfg, ) .await .expect("second call on a session in summary_delivered must be a no-op"); diff --git a/tests/phase3_self_resolution_template_audit.rs b/tests/phase3_self_resolution_template_audit.rs new file mode 100644 index 0000000..cc9fe9f --- /dev/null +++ b/tests/phase3_self_resolution_template_audit.rs @@ -0,0 +1,192 @@ +//! Keyword-audit test for the cooperative-self-resolution prompt +//! bundle (Feature 005). +//! +//! Loads the bundle file directly from `prompts/phase3-self-resolution.md` +//! (the same file the production loader picks up) and walks every +//! `(language_code, SelfResolutionLanguageEntry)` cell. For each +//! cell it builds the rendered string Serbero will actually send to +//! a party — `format!("{template} {human_assistance_optin}")` — and +//! asserts the rendered string contains NONE of the banned +//! fund-action substrings for that language section. +//! +//! Backs FR-004 (fund-action prohibition) and SC-003 (translation +//! drift guard). Adding a new `[xx]` language section MUST be +//! accompanied by a new entry in [`BANNED`] below; the loader +//! rejects an unknown language at parse time, but the matrix below +//! is what catches a translator who introduces a forbidden verb. + +use std::collections::HashMap; + +use serbero::mediation::self_resolution::{render_for, SelfResolutionTemplates}; +use serbero::prompts::self_resolution_parser; + +/// `(language_code, &[banned_substring])`. Substrings are matched +/// case-insensitively (`to_ascii_lowercase` on both sides) against +/// the rendered string. Spanish/Portuguese "ñ"/"ç" are preserved by +/// the underlying `to_ascii_lowercase` because `to_ascii_lowercase` +/// only touches ASCII A-Z; bytes outside that range are unchanged. +/// +/// Each language list MUST cover the same conceptual fund actions: +/// release / settle / cancel / disburse / transfer / refund / pay / +/// send fiat / send sats / unilateral close. When in doubt, err on +/// the side of more substrings. +const BANNED: &[(&str, &[&str])] = &[ + ( + "en", + &[ + "release", + "settle", + "cancel", + "disburse", + "transfer", + "refund", + "send the fiat", + "send the sats", + "send fiat", + "send sats", + "send the bitcoin", + "send bitcoin", + "wire ", + "wire-transfer", + "pay the seller", + "pay the buyer", + "close the dispute", + "force-close", + "force close", + "admin-settle", + "admin-cancel", + ], + ), + ( + "es", + &[ + "liberar", + "liberen", + "cancelar", + "cancelen", + "saldar", + "transferir", + "transferencia", + "reembolsar", + "reembolso", + "pagar", + "paguen", + "envíen el fiat", + "envíen los sats", + "enviar el fiat", + "enviar los sats", + "cerrar la disputa", + "cierren la disputa", + "admin-settle", + "admin-cancel", + ], + ), + ( + "pt", + &[ + "liberar", + "liberem", + "cancelar", + "cancelem", + "saldar", + "transferir", + "transferência", + "transferencia", + "reembolsar", + "reembolso", + "pagar", + "paguem", + "enviem o fiat", + "enviem os sats", + "enviar o fiat", + "enviar os sats", + "fechar a disputa", + "fechem a disputa", + "admin-settle", + "admin-cancel", + ], + ), +]; + +fn load_repo_bundle() -> SelfResolutionTemplates { + let raw = std::fs::read_to_string("prompts/phase3-self-resolution.md") + .expect("repo bundle file must exist at prompts/phase3-self-resolution.md"); + self_resolution_parser::parse(&raw).expect("repo bundle must parse cleanly") +} + +#[test] +fn bundle_parses_with_required_languages() { + let bundle = load_repo_bundle(); + // Initial set per spec: en/es/pt. The fallback MUST be one of + // the present language codes (the parser also enforces this). + for required in ["en", "es", "pt"] { + assert!( + bundle.by_language.contains_key(required), + "self-resolution bundle missing required language section [{required}]" + ); + } + assert!(bundle.by_language.contains_key(&bundle.fallback_language)); +} + +#[test] +fn rendered_strings_carry_no_banned_fund_action_keywords() { + let bundle = load_repo_bundle(); + + // Cross-check the audit matrix vs. the bundle: every language + // present in the bundle MUST have a matching `BANNED` row, and + // every `BANNED` row MUST point at a language present in the + // bundle. A translator who adds a `[de]` section without + // updating this file is caught here. + let banned_langs: HashMap<&str, &[&str]> = BANNED.iter().copied().collect(); + for code in bundle.by_language.keys() { + assert!( + banned_langs.contains_key(code.as_str()), + "language [{code}] is in the bundle but missing from the keyword-audit matrix; \ + extend BANNED in tests/phase3_self_resolution_template_audit.rs" + ); + } + for code in banned_langs.keys() { + assert!( + bundle.by_language.contains_key(*code), + "language [{code}] is in the keyword-audit matrix but missing from the bundle file" + ); + } + + // Walk every cell and assert no banned substring appears in the + // rendered string. Render via the production helper so the test + // covers the exact bytes a party receives. + for (code, entry) in &bundle.by_language { + let banned = banned_langs[code.as_str()]; + let rendered = render_for(Some(code), &bundle).to_ascii_lowercase(); + for needle in banned { + assert!( + !rendered.contains(needle), + "self-resolution [{code}] contains banned substring `{needle}`:\n template: {tpl:?}\n optin: {opt:?}", + tpl = entry.template, + opt = entry.human_assistance_optin, + ); + } + } +} + +#[test] +fn rendered_strings_include_human_assistance_optin_marker() { + // SC-005 + FR-005 backstop: every rendered invitation MUST + // include the explicit human-assistance opt-in sentence + // somewhere in its body, regardless of language. We can't pin a + // specific phrase across translations, so we assert the + // human_assistance_optin field is non-empty and that the + // rendered string contains it verbatim. + let bundle = load_repo_bundle(); + for (code, entry) in &bundle.by_language { + assert!( + !entry.human_assistance_optin.trim().is_empty(), + "[{code}] human_assistance_optin must be non-empty" + ); + let rendered = render_for(Some(code), &bundle); + assert!( + rendered.contains(&entry.human_assistance_optin), + "[{code}] rendered string did not include the configured opt-in sentence" + ); + } +} diff --git a/tests/phase3_take_reasoning_coupling.rs b/tests/phase3_take_reasoning_coupling.rs index 3d543ec..5959238 100644 --- a/tests/phase3_take_reasoning_coupling.rs +++ b/tests/phase3_take_reasoning_coupling.rs @@ -66,6 +66,9 @@ impl ReasoningProvider for EscalatingProvider { }, rationale: RationaleText("scripted fraud verdict for T106".into()), flags: vec![Flag::FraudRisk], + human_requested: false, + buyer_language: None, + seller_language: None, }) } async fn summarize( @@ -99,6 +102,9 @@ impl ReasoningProvider for ModelEscalatesProvider { )), rationale: RationaleText("scripted model-escalate verdict".into()), flags: Vec::new(), + human_requested: false, + buyer_language: None, + seller_language: None, }) } async fn summarize( diff --git a/tests/reasoning_anthropic.rs b/tests/reasoning_anthropic.rs index 35d9c1c..ca5ce00 100644 --- a/tests/reasoning_anthropic.rs +++ b/tests/reasoning_anthropic.rs @@ -34,6 +34,7 @@ fn fixture_bundle() -> Arc { escalation: "ESCALATION_MARKER: escalation rules".into(), mediation_style: "STYLE_MARKER: neutral tone".into(), message_templates: "TEMPLATE_MARKER: templates here".into(), + self_resolution: serbero::mediation::self_resolution::SelfResolutionTemplates::default(), }) } @@ -48,6 +49,7 @@ fn classification_request() -> ClassificationRequest { round_count: 0, last_classification: None, last_confidence: None, + session_has_self_resolution_offered: false, }, } } diff --git a/tests/reasoning_ppqai.rs b/tests/reasoning_ppqai.rs index 79063b1..a1fd327 100644 --- a/tests/reasoning_ppqai.rs +++ b/tests/reasoning_ppqai.rs @@ -50,6 +50,7 @@ fn fixture_bundle() -> Arc { escalation: "ESCALATION_MARKER: escalation rules".into(), mediation_style: "STYLE_MARKER: neutral tone".into(), message_templates: "TEMPLATE_MARKER: templates here".into(), + self_resolution: serbero::mediation::self_resolution::SelfResolutionTemplates::default(), }) } @@ -64,6 +65,7 @@ fn classification_request() -> ClassificationRequest { round_count: 0, last_classification: None, last_confidence: None, + session_has_self_resolution_offered: false, }, } } From 7c4b525b5eb78abe3b068a468faf612516e1d7ea Mon Sep 17 00:00:00 2001 From: grunch Date: Mon, 27 Apr 2026 21:43:06 -0300 Subject: [PATCH 3/9] review: address PR #48 review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code fixes (verified against current behavior): - prompts: keep policy_hash v1 when phase3-self-resolution.md is absent, only switch to v2 when the file is present. This preserves the bundle hash on legacy deployments so live sessions' pinned policy_hash keeps matching after upgrade — startup_resume_pass no longer compares mismatched hashes for healthy sessions. - prompts: only fall back to default templates on io::ErrorKind::NotFound; permission errors and other I/O failures now propagate as Error::PromptBundleLoad. - policy: gate the cooperative branch on a non-empty templates map. When the bundle file is absent, the branch is inert and the legacy Summarize path runs — so the render_for placeholder string can never reach a party-facing chat. - db::mediation::list_live_sessions: include `summary_delivered` sessions that received the cooperative invitation (EXISTS clause on a self_resolution_offered audit row), and relax the advance_session_round state gate accordingly. Without this, the human_requested short-circuit was unreachable: the cooperative dispatch transitions the session to summary_delivered, which the ingest loop excludes, so post-invitation replies were never re-classified. Carve-out is scoped to invited sessions only; legacy summary_delivered sessions stay terminal. - follow_up: when re-classifying a post-invitation summary_delivered session, only Escalate decisions are actionable. Other decisions (AskClarification / Summarize / SuggestSelfResolutionWithSummary) no-op silently — they would otherwise attempt illegal state transitions. - prompts::self_resolution_parser: error on duplicate language keys after case-normalization (e.g. `[en]` + `[EN]`) instead of silently overwriting one with the other. - db::mediation_events::record_self_resolution_offered: signature now takes Option<&str> for rationale_id so a missing classification rationale becomes NULL in the audit row instead of an empty string. Caller in follow_up updated. Skipped (verified non-issue): - The `self_resolution_offered` race-condition concern: the engine is single-threaded per session (ingest tick holds the AsyncMutex on the connection across the predicate + write), and the spec explicitly forbids new SQL migrations. UNIQUE constraint intentionally not added. Documentation fixes: - config.sample.toml: clarify that `self_resolution_threshold = 1.0` only nearly-disables (a classifier emitting exactly 1.0 still passes); the only strict disable is `self_resolution_enabled = false`. - contracts/classifier-output.md: schema example now shows `buyer_language` / `seller_language` alongside `human_requested` to match the implemented surface. - contracts/template-bundle.md: update "Loaded by" to point at `src/prompts/mod.rs::load_bundle` and `src/prompts/self_resolution_parser.rs`; document the v1/v2 hash fallback. - plan.md: clarify the lifecycle. Default / happy path still ends at summary_delivered; the explicit human-assistance opt-in path re-opens via SummaryDelivered → EscalationRecommended (gated on a prior self_resolution_offered audit row). - quickstart.md: rewrite prerequisites in present tense to reflect shipped behavior — both adapters share the prompt + parser pair so the human_requested field is emitted in both, and the prompts/phase3-self-resolution.md bundle is in the deploy. - tasks.md: replace the machine-local absolute path with a repository-relative description. - contracts/audit-events.md: add `text` language tag to the audit-row sequence code fences for markdown-lint compliance. Tests: - 4 new unit tests: parser duplicate-key guard, policy_hash_v2 determinism, policy_hash_v2 sensitivity to self_resolution bytes, policy_hash_v2 vs policy_hash domain separation, and the cooperative-branch inert-when-templates-empty regression. - All 292 lib tests + integration suite still pass. --- config.sample.toml | 6 +- .../contracts/audit-events.md | 4 +- .../contracts/classifier-output.md | 9 +++ .../contracts/template-bundle.md | 12 ++-- specs/005-cooperative-self-resolution/plan.md | 11 ++- .../quickstart.md | 19 +++-- .../005-cooperative-self-resolution/tasks.md | 2 +- src/db/mediation.rs | 38 +++++++--- src/db/mediation_events.rs | 4 +- src/mediation/follow_up.rs | 45 ++++++++++-- src/mediation/policy.rs | 70 ++++++++++++++++++- src/prompts/hash.rs | 29 ++++++++ src/prompts/mod.rs | 48 ++++++++++--- src/prompts/self_resolution_parser.rs | 36 ++++++++++ 14 files changed, 283 insertions(+), 50 deletions(-) diff --git a/config.sample.toml b/config.sample.toml index ffdee08..990c8e7 100644 --- a/config.sample.toml +++ b/config.sample.toml @@ -83,8 +83,10 @@ solver_auth_retry_max_attempts = 24 # Confidence floor at which the cooperative branch fires. Range # 0.0..=1.0; values outside that range fail loudly at startup. Set # higher (e.g. 0.90) to fire only on very-high-confidence cooperative -# cases. Setting this to 1.0 effectively disables the branch — use -# the kill-switch below instead. +# cases. A value of 1.0 only nearly disables the branch (a classifier +# that emits exactly 1.0 still passes the gate); the strict, only +# disable mechanism is `self_resolution_enabled = false` (the +# kill-switch below). self_resolution_threshold = 0.75 # Master kill-switch. When false, the branch is bypassed entirely diff --git a/specs/005-cooperative-self-resolution/contracts/audit-events.md b/specs/005-cooperative-self-resolution/contracts/audit-events.md index dad7dc3..722f3b9 100644 --- a/specs/005-cooperative-self-resolution/contracts/audit-events.md +++ b/specs/005-cooperative-self-resolution/contracts/audit-events.md @@ -78,7 +78,7 @@ schema. No structural change. For a session that takes the cooperative-invitation path, the audit-row sequence MUST be: -``` +```text 1. session_opened (existing — emitted at session open) 2. classification_produced (existing — emitted on round 0 / round 1) 3. self_resolution_offered (NEW — this feature) @@ -95,7 +95,7 @@ when Mostro genuinely resolves the underlying dispute (existing If a party opts in to human assistance after the invitation, the sequence becomes: -``` +```text 1. session_opened 2. classification_produced (round 0 / round 1) 3. self_resolution_offered diff --git a/specs/005-cooperative-self-resolution/contracts/classifier-output.md b/specs/005-cooperative-self-resolution/contracts/classifier-output.md index cdd7334..f0a2013 100644 --- a/specs/005-cooperative-self-resolution/contracts/classifier-output.md +++ b/specs/005-cooperative-self-resolution/contracts/classifier-output.md @@ -19,10 +19,19 @@ contract before the feature can ship for that provider. "buyer_clarification": "", "seller_clarification": "", "rationale": "", + "buyer_language": "es", + "seller_language": "en", "human_requested": false } ``` +`buyer_language` and `seller_language` are emitted on **every** +round (ISO-639-1 code or `null`); the runtime uses them to drive +the cooperative-self-resolution dispatch arm without needing a +Rust-side language-detection helper. `human_requested` is only +requested by the prompt on rounds following a +`self_resolution_offered` event. + The `human_requested` field is a plain JSON boolean. It is **only requested by the prompt** on rounds following a `self_resolution_offered` audit event for the session. Other diff --git a/specs/005-cooperative-self-resolution/contracts/template-bundle.md b/specs/005-cooperative-self-resolution/contracts/template-bundle.md index f6fd94f..e683ce2 100644 --- a/specs/005-cooperative-self-resolution/contracts/template-bundle.md +++ b/specs/005-cooperative-self-resolution/contracts/template-bundle.md @@ -1,11 +1,15 @@ # Contract: Self-Resolution Template Bundle **File**: `prompts/phase3-self-resolution.md` -**Loaded by**: `src/prompts/bundle.rs` (existing loader; this -feature adds parsing for the new file) +**Loaded by**: `src/prompts/mod.rs::load_bundle` together with the +other Phase 3 prompt files; the parser implementation lives in +`src/prompts/self_resolution_parser.rs`. **Pinned via**: the existing `prompt_bundle_id` + `policy_hash` on -`mediation_sessions`. Sessions opened against bundle v1 see v1 -templates even after a v2 deploys. +`mediation_sessions`. The hash extends over the cooperative-self- +resolution bytes via `prompts::hash::policy_hash_v2` when the file +is present; legacy deployments without the file fall back to +`policy_hash` (v1) so the hash does not rotate. Sessions opened +against bundle v1 see v1 templates even after a v2 deploys. ## File Format diff --git a/specs/005-cooperative-self-resolution/plan.md b/specs/005-cooperative-self-resolution/plan.md index c5fd5f0..7347c13 100644 --- a/specs/005-cooperative-self-resolution/plan.md +++ b/specs/005-cooperative-self-resolution/plan.md @@ -43,9 +43,14 @@ it does today (SC-007). The feature is **strictly additive**: no DB migration, no changes to the existing `Summarize` decision path beyond adding a new sibling -variant, no changes to the session lifecycle (the session still -ends at `summary_delivered`, exactly as the recently-shipped fix in -`main` left it). +variant. The default / happy-path session lifecycle still ends at +`summary_delivered` (exactly as the recently-shipped fix in `main` +left it). For the explicit human-assistance opt-in, however, a new +`SummaryDelivered → EscalationRecommended` edge lets the session +re-open into the Phase 4 dispatcher when a party reply asks for a +human after the cooperative invitation. The carve-out is gated on +the presence of a prior `self_resolution_offered` audit row, so +legacy `summary_delivered` sessions stay terminal exactly as before. ## Technical Context diff --git a/specs/005-cooperative-self-resolution/quickstart.md b/specs/005-cooperative-self-resolution/quickstart.md index 52fd620..db04dd8 100644 --- a/specs/005-cooperative-self-resolution/quickstart.md +++ b/specs/005-cooperative-self-resolution/quickstart.md @@ -12,17 +12,14 @@ Before this feature can be exercised: - **`main` carries the `summary_delivered` lifecycle fix** that defers `summary_delivered → closed` to the `dispute_resolved` - handler. Without it, the engine reopens duplicate sessions - mid-coordination and the cooperative invitation becomes - ineffective. (Already shipped in PR #47.) -- **A reasoning provider that emits the `human_requested` field - on round N+1**. As of this feature's plan date neither - adapter (OpenAI-compatible, Anthropic) emits it; a Phase 2 - task in this feature ships that update. -- **An updated `prompts/phase3-self-resolution.md` bundle file** - with at minimum `[en]` populated. Templates land in the same PR - as the code; the keyword-audit unit test refuses to merge a - bundle with banned substrings. + handler (already shipped in PR #47). +- **The classifier emits the `human_requested` field on round N+1.** + Both reasoning adapters (OpenAI-compatible and Anthropic) reuse + the shared `build_classification_prompt` / `parse_classification` + pair, so this rolls out together when the feature ships. +- **`prompts/phase3-self-resolution.md` is present in the deploy.** + Initial language set: `[en]`, `[es]`, `[pt]`. The keyword-audit + unit test refuses to merge a bundle with banned substrings. ## Configuration (operator-side) diff --git a/specs/005-cooperative-self-resolution/tasks.md b/specs/005-cooperative-self-resolution/tasks.md index 001da49..d541baf 100644 --- a/specs/005-cooperative-self-resolution/tasks.md +++ b/specs/005-cooperative-self-resolution/tasks.md @@ -16,7 +16,7 @@ description: "Task list for Cooperative Self-Resolution Nudge (Feature 005)" - **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks) - **[Story]**: User story label (US1 / US2 / US3) — only on user-story phase tasks -- All paths are absolute under the repository root `/home/negrunch/dev/cancerbero/` +- All paths are relative to the repository root (e.g. `src/...`, `tests/...`); concrete examples below use that prefix without a leading machine-local path. --- diff --git a/src/db/mediation.rs b/src/db/mediation.rs index 35f864d..55b37f4 100644 --- a/src/db/mediation.rs +++ b/src/db/mediation.rs @@ -282,18 +282,34 @@ pub struct LiveSession { pub fn list_live_sessions(conn: &Connection) -> Result> { use std::str::FromStr; + // Feature 005: a session in `summary_delivered` that received the + // cooperative-self-resolution invitation stays watchable so a + // party reply can still trigger the `PartyRequestedHuman` + // escalation short-circuit. The EXISTS clause keeps the query + // index-friendly and surgical — only sessions with a prior + // `self_resolution_offered` audit row are revived; legacy + // summary_delivered sessions stay terminal as before. let mut stmt = conn.prepare( - "SELECT session_id, dispute_id, state, - prompt_bundle_id, policy_hash, - buyer_shared_pubkey, seller_shared_pubkey - FROM mediation_sessions - WHERE state NOT IN ( - 'closed', - 'summary_delivered', - 'escalation_recommended', - 'superseded_by_human' - ) - ORDER BY started_at ASC", + "SELECT s.session_id, s.dispute_id, s.state, + s.prompt_bundle_id, s.policy_hash, + s.buyer_shared_pubkey, s.seller_shared_pubkey + FROM mediation_sessions s + WHERE + s.state NOT IN ( + 'closed', + 'summary_delivered', + 'escalation_recommended', + 'superseded_by_human' + ) + OR ( + s.state = 'summary_delivered' + AND EXISTS ( + SELECT 1 FROM mediation_events e + WHERE e.session_id = s.session_id + AND e.kind = 'self_resolution_offered' + ) + ) + ORDER BY s.started_at ASC", )?; let rows = stmt.query_map([], |r| { Ok(( diff --git a/src/db/mediation_events.rs b/src/db/mediation_events.rs index 3a809ce..dc0fd13 100644 --- a/src/db/mediation_events.rs +++ b/src/db/mediation_events.rs @@ -682,7 +682,7 @@ pub fn record_escalation_dispatch_parse_failed( pub fn record_self_resolution_offered( conn: &Connection, session_id: &str, - rationale_id: &str, + rationale_id: Option<&str>, confidence: f64, buyer_language: Option<&str>, seller_language: Option<&str>, @@ -705,7 +705,7 @@ pub fn record_self_resolution_offered( MediationEventKind::SelfResolutionOffered, Some(session_id), &payload, - Some(rationale_id), + rationale_id, Some(prompt_bundle_id), Some(policy_hash), occurred_at, diff --git a/src/mediation/follow_up.rs b/src/mediation/follow_up.rs index a625f0c..73bedad 100644 --- a/src/mediation/follow_up.rs +++ b/src/mediation/follow_up.rs @@ -132,10 +132,25 @@ pub async fn advance_session_round( return Ok(()); } }; - if !matches!(info.state, MediationSessionState::AwaitingResponse) { + // State gate: normally the session must be in `awaiting_response`. + // Feature 005 carve-out: a session in `summary_delivered` that + // received the cooperative invitation is still re-classifiable so + // the human-assistance opt-in path (FR-008) can fire when a party + // reply arrives after the summary. The carve-out is scoped by the + // `self_resolution_offered` audit row — a legacy + // `summary_delivered` session without that row is still treated + // as terminal. + let is_post_invitation_summary_delivered = + matches!(info.state, MediationSessionState::SummaryDelivered) && { + let guard = conn.lock().await; + db::mediation_events::session_has_self_resolution_offered(&guard, session_id)? + }; + if !matches!(info.state, MediationSessionState::AwaitingResponse) + && !is_post_invitation_summary_delivered + { debug!( state = %info.state, - "advance_session_round: session not in awaiting_response; skipping" + "advance_session_round: session not in awaiting_response (and not a post-invitation summary_delivered); skipping" ); return Ok(()); } @@ -286,6 +301,25 @@ pub async fn advance_session_round( }; // (7) Dispatch. + // + // Feature 005 carve-out: when the session is in + // `summary_delivered` (re-entered for the post-invitation + // re-classification path), only an `Escalate` decision is + // actionable. Any other decision would attempt to walk an + // illegal transition (e.g. `summary_delivered → classified`). + // The classification_produced audit row is already durable from + // `policy::evaluate`, so a "wait silently" no-op is the right + // outcome for a non-escalating reply. + if is_post_invitation_summary_delivered + && !matches!(decision, policy::PolicyDecision::Escalate(_)) + { + debug!( + state = %info.state, + ?decision, + "advance_session_round: post-invitation reply did not request human; staying in summary_delivered" + ); + return Ok(()); + } match decision { policy::PolicyDecision::AskClarification { buyer_text, @@ -679,11 +713,14 @@ async fn draft_and_send_self_resolution_invitation( // producing classification's content hash; the `confidence` // and per-party language codes go into the structured // payload so a forensic replay can reconstruct exactly which - // template section each party received. + // template section each party received. `None` for + // rationale_id is allowed (defensive: a session with a + // missing classification_produced row still gets the audit + // row, just without the FK link). db::mediation_events::record_self_resolution_offered( &tx, session_id, - rationale_id.unwrap_or(""), + rationale_id, confidence, buyer_language, seller_language, diff --git a/src/mediation/policy.rs b/src/mediation/policy.rs index 9e25e98..cbed49a 100644 --- a/src/mediation/policy.rs +++ b/src/mediation/policy.rs @@ -305,11 +305,20 @@ pub async fn evaluate( // pre-condition fails the legacy decision passes through // unchanged — preserving byte-for-byte legacy behaviour with the // kill-switch off (SC-007). + // Bundle-availability gate. A daemon that hasn't shipped + // `prompts/phase3-self-resolution.md` yet loads with empty + // templates and `render_for` would otherwise emit the operator + // placeholder to end users. Falling through to the legacy + // Summarize keeps the user-facing surface clean — the cooperative + // branch becomes inert until the file lands. + let templates_present = !prompt_bundle.self_resolution.by_language.is_empty(); + let decision = match base_decision { PolicyDecision::Summarize { classification: ClassificationLabel::CoordinationFailureResolvable, confidence, } if mediation_cfg.self_resolution_enabled + && templates_present && (confidence as f32) >= mediation_cfg.self_resolution_threshold && !prior_offered => { @@ -825,6 +834,18 @@ mod tests { use crate::prompts::PromptBundle; fn test_bundle() -> Arc { + // Populate one language entry so the cooperative-branch + // tests below see a non-empty `by_language` map. Tests that + // exercise the empty-bundle inert behavior construct their + // own bundle inline. + let mut by_language = std::collections::HashMap::new(); + by_language.insert( + "en".to_string(), + crate::mediation::self_resolution::SelfResolutionLanguageEntry { + template: "test invitation".into(), + human_assistance_optin: "test optin".into(), + }, + ); Arc::new(PromptBundle { id: "phase3-default".into(), policy_hash: "test-policy-hash".into(), @@ -833,7 +854,10 @@ mod tests { escalation: "esc".into(), mediation_style: "style".into(), message_templates: "tpl".into(), - self_resolution: crate::mediation::self_resolution::SelfResolutionTemplates::default(), + self_resolution: crate::mediation::self_resolution::SelfResolutionTemplates { + by_language, + fallback_language: "en".into(), + }, }) } @@ -1530,6 +1554,50 @@ mod tests { ); } + #[tokio::test] + async fn evaluate_cooperative_branch_inert_when_templates_empty() { + // Backstop for an upgrade path: the daemon loads with empty + // `self_resolution` templates (file not yet shipped) and the + // operator left `self_resolution_enabled = true` by default. + // Without this gate, the branch would fire and `render_for` + // would emit the placeholder operator-message string to end + // users. The gate falls through to the legacy summarize so + // the user-facing chat stays clean. + let conn = fresh_conn(); + let cfg = enabled_cooperative_cfg(0.75); + // Bundle with empty by_language map. + let empty_bundle = Arc::new(PromptBundle { + id: "phase3-default".into(), + policy_hash: "test-policy-hash".into(), + system: "sys".into(), + classification: "cls".into(), + escalation: "esc".into(), + mediation_style: "style".into(), + message_templates: "tpl".into(), + self_resolution: crate::mediation::self_resolution::SelfResolutionTemplates::default(), + }); + let resp = cooperative_summary_response(0.95); + let decision = evaluate( + &conn, + "sess-policy", + &empty_bundle, + "openai", + "gpt-test", + resp, + 4, + &cfg, + ) + .await + .unwrap(); + assert_eq!( + decision, + PolicyDecision::Summarize { + classification: ClassificationLabel::CoordinationFailureResolvable, + confidence: 0.95, + }, + ); + } + #[tokio::test] async fn evaluate_no_lock_in_after_invitation_for_non_cooperative_label() { // US3: a non-cooperative classification on a round following diff --git a/src/prompts/hash.rs b/src/prompts/hash.rs index a43552e..c2c7ffc 100644 --- a/src/prompts/hash.rs +++ b/src/prompts/hash.rs @@ -118,4 +118,33 @@ mod tests { let b = policy_hash("abc", "d", "", "", ""); assert_ne!(a, b); } + + #[test] + fn policy_hash_v2_identical_inputs_produce_identical_hashes() { + let a = policy_hash_v2("s", "c", "e", "m", "t", "r"); + let b = policy_hash_v2("s", "c", "e", "m", "t", "r"); + assert_eq!(a, b); + assert_eq!(a.len(), 64); + assert!(a.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn policy_hash_v2_self_resolution_change_flips_hash() { + let base = policy_hash_v2("s", "c", "e", "m", "t", "r"); + assert_ne!(base, policy_hash_v2("s", "c", "e", "m", "t", "R")); + assert_ne!(base, policy_hash_v2("s", "c", "e", "m", "t", "")); + } + + #[test] + fn policy_hash_v2_differs_from_v1_even_with_empty_self_resolution() { + // Pin the domain-separation guarantee: even when the v2 hash + // is computed with an empty self-resolution segment, it must + // NOT collide with the v1 hash on the same first five + // arguments. The labelled segment for `self_resolution` + // (length 0) still feeds bytes into the hasher, so the two + // outputs diverge. + let v1 = policy_hash("s", "c", "e", "m", "t"); + let v2 = policy_hash_v2("s", "c", "e", "m", "t", ""); + assert_ne!(v1, v2); + } } diff --git a/src/prompts/mod.rs b/src/prompts/mod.rs index fc770cf..d7364d9 100644 --- a/src/prompts/mod.rs +++ b/src/prompts/mod.rs @@ -77,23 +77,53 @@ pub fn load_bundle(config: &PromptsConfig) -> Result { ))); } }, - Err(_) => { + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + // Legacy / pre-feature deployment. Inert the cooperative + // branch (the policy gate further down also checks + // `by_language.is_empty()`); legacy summary path runs + // unchanged. Only NotFound triggers this fallback — + // permission errors, EIO, etc. are real configuration + // problems and must surface loudly. tracing::warn!( path = %self_resolution_path, "phase3-self-resolution.md not found; cooperative-self-resolution branch will be inert until the file is added" ); (SelfResolutionTemplates::default(), String::new()) } + Err(e) => { + return Err(Error::PromptBundleLoad(format!( + "failed to read self-resolution templates at {self_resolution_path}: {e}" + ))); + } }; - let policy_hash = hash::policy_hash_v2( - &system, - &classification, - &escalation, - &mediation_style, - &message_templates, - &self_resolution_raw, - ); + // Backwards-compat hash policy. When the cooperative-self-resolution + // bundle file is absent (legacy deployment that hasn't shipped + // `phase3-self-resolution.md` yet), we preserve the v1 hash so a + // restart does NOT rotate the policy hash for live sessions — + // pinned hashes on `mediation_sessions.policy_hash` keep matching + // and the `startup_resume_pass` mismatch path stays inert. + // Once the file is present (post-feature deployment), the v2 hash + // extends over the additional bytes so SC-103 forensic replay + // pins the cooperative bundle byte-for-byte too. + let policy_hash = if self_resolution_raw.is_empty() { + hash::policy_hash( + &system, + &classification, + &escalation, + &mediation_style, + &message_templates, + ) + } else { + hash::policy_hash_v2( + &system, + &classification, + &escalation, + &mediation_style, + &message_templates, + &self_resolution_raw, + ) + }; Ok(PromptBundle { id: "phase3-default".to_string(), diff --git a/src/prompts/self_resolution_parser.rs b/src/prompts/self_resolution_parser.rs index cea60b4..e5f6a30 100644 --- a/src/prompts/self_resolution_parser.rs +++ b/src/prompts/self_resolution_parser.rs @@ -76,6 +76,18 @@ pub fn parse(raw: &str) -> Result { "[{normalized}] human_assistance_optin must not be empty" )); } + // Duplicate-key guard. TOML rejects exact-string duplicates, + // but two sections that only differ in case (`[en]` and + // `[EN]`) collide after our normalization step. Loud failure + // beats a silent overwrite — a translator who copies a + // language section and forgets to relabel it should fail to + // ship rather than have one of the two bodies disappear at + // load time. + if by_language.contains_key(&normalized) { + return Err(format!( + "duplicate language section after normalization: `{normalized}`" + )); + } by_language.insert( normalized, SelfResolutionLanguageEntry { @@ -226,6 +238,30 @@ human_assistance_optin = "ok" assert!(err.contains("TOML")); } + #[test] + fn rejects_duplicate_language_after_normalization() { + // `[en]` and `[EN]` are two distinct TOML sections, but + // collapse to the same key after `to_ascii_lowercase`. The + // parser must error rather than silently keep whichever + // happened to land in the HashMap last. + let raw = r#" +fallback_language = "en" + +[en] +template = "first" +human_assistance_optin = "first-optin" + +[EN] +template = "second" +human_assistance_optin = "second-optin" +"#; + let err = parse(raw).unwrap_err(); + assert!( + err.contains("duplicate"), + "expected duplicate-key error: {err}" + ); + } + #[test] fn normalizes_language_keys_to_lowercase() { let raw = r#" From e4e0b3b16edad9f10aac5122354dbeafd608714c Mon Sep 17 00:00:00 2001 From: grunch Date: Tue, 28 Apr 2026 07:19:48 -0300 Subject: [PATCH 4/9] review: address PR #48 second-round review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified each finding against the current code; fixed real bugs and spec-vs-code mismatches, skipped one non-issue with justification. Real bugs fixed: - db::mediation::latest_open_session_for: mirrored the Feature 005 carve-out from `list_live_sessions`. A `summary_delivered` session that received the cooperative invitation now also surfaces through the open-session lookup, so a re-take or a re-mediation attempt observes the same session-state semantics consistently across both queries. Legacy `summary_delivered` sessions stay terminal (no `self_resolution_offered` row). - mediation::follow_up: when the post-invitation early-return arm fires (re-classified summary_delivered session whose decision is not `Escalate`), advance `round_count_last_evaluated` before returning. Without the advance the FR-127 idempotency gate would re-classify the same fresh inbound on every subsequent tick, burning reasoning-provider budget on a session that's already settled into "wait silently for a possible human-assistance request". - mediation::policy: switch the cooperative threshold comparison to `confidence >= f64::from(self_resolution_threshold)` so the classifier's f64 confidence is no longer cast down to f32 before comparison. Threshold storage stays f32 per the config contract; the comparison is now precision-honest. Spec-vs-code mismatches fixed (audit-row payload): - db::mediation_events::record_self_resolution_offered: payload shape now matches `contracts/audit-events.md`: * `classification_confidence` (renamed from `confidence`) * `rationale_id` lives inside `payload_json` (the dedicated `mediation_events.rationale_id` column stays NULL on this kind — the column convention is reserved for rationales whose lifecycle is owned by the audit row itself; here the rationale is owned by the round-N classification call). * `languages` carries only `buyer` and `seller` codes (dropped `fallback`, which was code-internal noise). * Signature changed from `&Connection` to `&Transaction<'_>` so the audit row can only be written inside an outer transaction — same shape and discipline as `record_escalation_dispatched`. The caller in `follow_up.rs` already passes `&tx`, so this enforcement is purely additive. Documentation fixes (verified against shipped code): - config.sample.toml: clarified the `self_resolution_enabled = false` description. With the kill-switch off, Serbero falls back to the legacy cooperative-summary path (LLM still summarizes for the solver); it does NOT force every cooperative case through human review as the previous comment overstated. - contracts/classifier-output.md: clarified the serde-defaults story. `ClassificationResponse` itself has no serde derives — it is the adapter-facing public shape, not a wire type. The actual wire-format deserialization lives in each adapter's intermediate struct (`ClassificationJson` in src/reasoning/openai.rs), which carries the `#[serde(default)]` annotations on `human_requested` / `buyer_language` / `seller_language`. Adapters are responsible for translating the wire struct into a `ClassificationResponse`. - contracts/template-bundle.md: corrected the "ASCII-folded" claim. The keyword-audit comparison uses `str::to_ascii_lowercase` on both sides, which folds ASCII case but preserves diacritics (`ñ`, `ç`, `á`). The banned list enumerates diacritic-bearing forms verbatim — adding Unicode normalization would pull a new dependency for negligible coverage gain. The representative banned-list table in the spec now matches the canonical `BANNED` matrix in the test file. - contracts/audit-events.md: relaxed the "Steps 3 and 4 land in the same transaction" claim to accurately describe the transactional-outbox behavior. Step 3 (`self_resolution_offered`) + the two outbound `mediation_messages` rows commit atomically; step 4 (`summary_generated`) lands in a subsequent transaction because the summarizer makes an LLM HTTP call between the two. A crash between TX1 and TX2 leaves the audit row + outbound rows in place, and the next tick's idempotency check sees the `self_resolution_offered` row + falls through to the legacy Summarize path to recover the still-pending summary. Skipped (verified non-issue): - The TOCTOU race re-flagged on the `prior_offered` predicate in `policy::evaluate` vs. the later write in `follow_up::dispatch`. In single-process operation the engine holds the `Arc>` across the predicate read and the eventual write inside `draft_and_send_self_resolution_invitation`'s transaction, so two concurrent `advance_session_round` calls cannot both pass the predicate. Adding a UNIQUE partial index would require a new SQL migration which `plan.md` forbids; an HA / multi-process deploy is out of scope for this feature. --- config.sample.toml | 13 ++++--- .../contracts/audit-events.md | 17 +++++++-- .../contracts/classifier-output.md | 33 +++++++++++++---- .../contracts/template-bundle.md | 28 +++++++++----- src/db/mediation.rs | 33 +++++++++++++---- src/db/mediation_events.rs | 37 +++++++++++++------ src/mediation/follow_up.rs | 30 ++++++++++----- src/mediation/policy.rs | 6 ++- 8 files changed, 141 insertions(+), 56 deletions(-) diff --git a/config.sample.toml b/config.sample.toml index 990c8e7..24c1747 100644 --- a/config.sample.toml +++ b/config.sample.toml @@ -89,11 +89,14 @@ solver_auth_retry_max_attempts = 24 # kill-switch below). self_resolution_threshold = 0.75 -# Master kill-switch. When false, the branch is bypassed entirely -# and Serbero behaves byte-for-byte as before this feature shipped -# (the legacy cooperative-summary path runs unchanged). Use during -# incident windows or audit reviews when you want to force every -# cooperative case through human review. +# Master kill-switch. When `self_resolution_enabled = false`, the +# cooperative branch is bypassed entirely and Serbero behaves +# byte-for-byte as before this feature shipped: a high-confidence +# `coordination_failure_resolvable` round falls through to the +# legacy cooperative-summary path (the LLM still summarizes for the +# solver; the parties just don't receive the templated invitation). +# Use during incident windows or audit reviews when you want the +# legacy summarize-only flow without any party-facing nudge. self_resolution_enabled = true # --------------------------------------------------------------------------- diff --git a/specs/005-cooperative-self-resolution/contracts/audit-events.md b/specs/005-cooperative-self-resolution/contracts/audit-events.md index 722f3b9..1fda7b0 100644 --- a/specs/005-cooperative-self-resolution/contracts/audit-events.md +++ b/specs/005-cooperative-self-resolution/contracts/audit-events.md @@ -86,9 +86,20 @@ audit-row sequence MUST be: 5. session_closed (existing — emitted later by dispute_resolved) ``` -Steps 3 and 4 land in the same transaction. Step 5 lands later -when Mostro genuinely resolves the underlying dispute (existing -`dispute_resolved` handler). +Step 3 (`self_resolution_offered`) lands in the **same** +transaction as the two outbound `mediation_messages` rows — the +self-resolution audit row and the per-party gift-wrap drafts +commit atomically so a crash between them is impossible. Step 4 +(`summary_generated`) lands in a **subsequent** transaction owned +by `deliver_summary`, because the summarizer runs an LLM HTTP call +between the two. A crash between TX1 and TX2 leaves the audit row ++ outbound rows in place; the next tick's idempotency check sees +the `self_resolution_offered` row, falls through the cooperative +branch's pre-condition (`!prior_offered`), and the legacy +`Summarize` path picks up the still-pending summary delivery. + +Step 5 lands later when Mostro genuinely resolves the underlying +dispute (existing `dispute_resolved` handler). ## Audit-Row Sequence on the Opt-In Path diff --git a/specs/005-cooperative-self-resolution/contracts/classifier-output.md b/specs/005-cooperative-self-resolution/contracts/classifier-output.md index f0a2013..79e1907 100644 --- a/specs/005-cooperative-self-resolution/contracts/classifier-output.md +++ b/specs/005-cooperative-self-resolution/contracts/classifier-output.md @@ -66,28 +66,45 @@ case the parties might have resolved themselves. ## Rust-Side Parsing -`ClassificationResponse` (`src/models/reasoning.rs`) gains: +`ClassificationResponse` (`src/models/reasoning.rs`) gains three +additive plain Rust fields: ```rust pub struct ClassificationResponse { // ... existing fields ... - #[serde(default)] pub human_requested: bool, + pub buyer_language: Option, + pub seller_language: Option, } ``` -`serde(default)` covers two scenarios: +`ClassificationResponse` itself has no `serde` derives — it is the +adapter-facing shape, not a wire type. The actual wire-format +deserialization lives in each adapter's intermediate struct +(`ClassificationJson` in `src/reasoning/openai.rs`, mirrored on the +Anthropic adapter via the shared parser); those structs DO carry +`#[serde(default)]` on `human_requested`, `buyer_language`, and +`seller_language`. The default-fallback covers two scenarios: 1. A provider that hasn't yet been updated to emit the field. - The struct deserialises with `human_requested = false`; the - opt-in path silently never fires for that provider until the - provider's prompt + parser are updated. A startup-time health- - check (R-003 in `research.md`) logs a warning when the - provider doesn't echo a probe. + The wire struct deserialises with `human_requested = false` (and + `buyer_language`/`seller_language` as `None`); the opt-in path + silently never fires for that provider until the provider's + prompt + parser are updated. A startup-time health-check (R-003 + in `research.md`) logs a warning when the provider doesn't echo + a probe. 2. Round 0 / round 1 responses where the prompt didn't request the field. Same default; no false escalation. +The adapter is responsible for translating the wire struct into a +`ClassificationResponse` with the explicit fields populated — i.e. +mapping a missing/false `human_requested` to +`human_requested: false`. New adapters that hand-build a +`ClassificationResponse` (e.g. test fixtures) must therefore set +the three fields explicitly; the type system enforces this since +the struct has no `Default` impl. + ## Policy-Side Behaviour `policy::evaluate(...)` adds **one** short-circuit before the diff --git a/specs/005-cooperative-self-resolution/contracts/template-bundle.md b/specs/005-cooperative-self-resolution/contracts/template-bundle.md index e683ce2..4b7f93d 100644 --- a/specs/005-cooperative-self-resolution/contracts/template-bundle.md +++ b/specs/005-cooperative-self-resolution/contracts/template-bundle.md @@ -77,15 +77,25 @@ shipping value: `"en"`. The keyword-audit unit test (`tests/phase3_self_resolution_template_audit.rs`) loads the bundle and walks every `(language, entry)` cell, asserting that the rendered string `format!("{} {}", template, -human_assistance_optin)` does **NOT** contain any of the following -substrings (case-insensitive, ASCII-folded for the diacritic -languages): - -| Language tag | Banned substrings | -|--------------|-------------------| -| `en` | `release`, `settle`, `cancel`, `disburse`, `transfer`, `refund`, `payout` | -| `es` | `liberar`, `liberación`, `liquidar`, `cancelar`, `transferir`, `reembolsar`, `desembolsar` | -| `pt` | `liberar`, `libertar`, `liquidar`, `cancelar`, `transferir`, `reembolsar`, `desembolsar` | +human_assistance_optin)` does **NOT** contain any of the language's +banned substrings. + +Comparison normalization: both the rendered string and each banned +substring are passed through `str::to_ascii_lowercase` before the +substring check. ASCII byte case is folded; non-ASCII bytes +(diacritics like `ñ`, `ç`, `á`) are preserved verbatim in both +sides of the comparison. This is intentional — adding Unicode +normalization would pull a new dependency for negligible coverage +gain (the banned list already enumerates the diacritic-bearing +forms, and translators submit copy in NFC the keyboard input +methods produce). New languages MUST follow the same rule: list the +diacritic-bearing forms verbatim. + +| Language tag | Banned substrings (representative; canonical list lives in the test file) | +|--------------|---------------------------------------------------------------------------| +| `en` | `release`, `settle`, `cancel`, `disburse`, `transfer`, `refund`, `payout`, `wire`, `force-close`, `admin-settle`, `admin-cancel` | +| `es` | `liberar`, `liberen`, `cancelar`, `cancelen`, `saldar`, `transferir`, `transferencia`, `reembolsar`, `reembolso`, `pagar`, `paguen`, `envíen el fiat`, `envíen los sats`, `cerrar la disputa` | +| `pt` | `liberar`, `liberem`, `cancelar`, `cancelem`, `saldar`, `transferir`, `transferência`, `reembolsar`, `reembolso`, `pagar`, `paguem`, `enviem o fiat`, `enviem os sats`, `fechar a disputa` | The list is the union of "verbs that name a fund-moving action in the Mostro / P2P-escrow domain" plus their direct cognates. New diff --git a/src/db/mediation.rs b/src/db/mediation.rs index 55b37f4..336aabd 100644 --- a/src/db/mediation.rs +++ b/src/db/mediation.rs @@ -393,6 +393,13 @@ pub fn set_session_state( /// `superseded_by_human`) are excluded — a dispute that was closed /// or escalated earlier must not block a later session open. /// +/// Feature 005 carve-out (mirrors [`list_live_sessions`]): a session +/// in `summary_delivered` that received the cooperative-self-resolution +/// invitation is still considered live so the human-assistance opt-in +/// path can fire on a later party reply. The carve-out is scoped by +/// the `self_resolution_offered` audit row, so legacy +/// `summary_delivered` sessions stay terminal. +/// /// Used by the engine to gate session opens and, crucially, re-checked /// inside the final open-session DB transaction to close the /// check-then-act race. @@ -403,15 +410,25 @@ pub fn latest_open_session_for( use std::str::FromStr; match conn.query_row( - "SELECT session_id, state FROM mediation_sessions - WHERE dispute_id = ?1 - AND state NOT IN ( - 'closed', - 'summary_delivered', - 'escalation_recommended', - 'superseded_by_human' + "SELECT s.session_id, s.state FROM mediation_sessions s + WHERE s.dispute_id = ?1 + AND ( + s.state NOT IN ( + 'closed', + 'summary_delivered', + 'escalation_recommended', + 'superseded_by_human' + ) + OR ( + s.state = 'summary_delivered' + AND EXISTS ( + SELECT 1 FROM mediation_events e + WHERE e.session_id = s.session_id + AND e.kind = 'self_resolution_offered' + ) + ) ) - ORDER BY started_at DESC + ORDER BY s.started_at DESC LIMIT 1", params![dispute_id], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)), diff --git a/src/db/mediation_events.rs b/src/db/mediation_events.rs index dc0fd13..1540a5e 100644 --- a/src/db/mediation_events.rs +++ b/src/db/mediation_events.rs @@ -673,39 +673,52 @@ pub fn record_escalation_dispatch_parse_failed( /// self-resolution invitation audit row. /// /// Emitted before the outbound gift-wraps publish so the audit row is -/// durable even if relay publishing fails. Payload carries the -/// per-party language codes the dispatch arm resolved (used for -/// forensic replay per `quickstart.md`); the full rationale text -/// stays in `reasoning_rationales`, referenced by `rationale_id`, -/// per FR-120. +/// durable even if relay publishing fails. Payload shape matches +/// `specs/005-cooperative-self-resolution/contracts/audit-events.md`: +/// `classification_confidence` (not `confidence`), +/// `rationale_id` lives **inside** the payload (the dedicated +/// `mediation_events.rationale_id` column stays NULL — that column +/// is reserved for rationales whose lifecycle is owned by the audit +/// row itself, while here the rationale is owned by the round-N +/// classification call), and `languages` carries only `buyer` and +/// `seller` codes (no `fallback`). +/// +/// Takes `&Transaction<'_>` (not `&Connection`) so the audit row can +/// only be written inside an outer transaction — same shape as +/// `record_escalation_dispatched` and matching the FR-001 invariant +/// that the audit row + the two outbound `mediation_messages` rows +/// commit atomically. #[allow(clippy::too_many_arguments)] pub fn record_self_resolution_offered( - conn: &Connection, + tx: &Transaction<'_>, session_id: &str, rationale_id: Option<&str>, - confidence: f64, + classification_confidence: f64, buyer_language: Option<&str>, seller_language: Option<&str>, - fallback_language: &str, prompt_bundle_id: &str, policy_hash: &str, occurred_at: i64, ) -> Result { let payload = json!({ - "confidence": confidence, + "session_id": session_id, + "classification_confidence": classification_confidence, + "rationale_id": rationale_id, "languages": { "buyer": buyer_language, "seller": seller_language, - "fallback": fallback_language, }, }) .to_string(); + // Per the contract, the dedicated `rationale_id` column stays + // NULL on this kind — the rationale-id reference travels in the + // payload only. record_event( - conn, + tx, MediationEventKind::SelfResolutionOffered, Some(session_id), &payload, - rationale_id, + None, Some(prompt_bundle_id), Some(policy_hash), occurred_at, diff --git a/src/mediation/follow_up.rs b/src/mediation/follow_up.rs index 73bedad..0fb7139 100644 --- a/src/mediation/follow_up.rs +++ b/src/mediation/follow_up.rs @@ -313,10 +313,22 @@ pub async fn advance_session_round( if is_post_invitation_summary_delivered && !matches!(decision, policy::PolicyDecision::Escalate(_)) { + // Advance the evaluator marker before returning. Otherwise + // FR-127's idempotency gate at the top of the next tick + // would still see `total_fresh_inbounds > round_count_last_evaluated` + // and re-classify the same reply on every cycle — burning + // reasoning-provider budget on a session that's already + // settled into "wait silently for a possible human-assistance + // request". + let mut guard = conn.lock().await; + let tx = guard.transaction()?; + db::mediation::advance_evaluator_marker(&tx, session_id, total_fresh_inbounds)?; + tx.commit()?; debug!( state = %info.state, ?decision, - "advance_session_round: post-invitation reply did not request human; staying in summary_delivered" + round_count_marked = total_fresh_inbounds, + "advance_session_round: post-invitation reply did not request human; staying in summary_delivered (marker advanced)" ); return Ok(()); } @@ -709,14 +721,13 @@ async fn draft_and_send_self_resolution_invitation( persisted_at: now, }, )?; - // Self-resolution audit row. The `rationale_id` is the - // producing classification's content hash; the `confidence` - // and per-party language codes go into the structured - // payload so a forensic replay can reconstruct exactly which - // template section each party received. `None` for - // rationale_id is allowed (defensive: a session with a - // missing classification_produced row still gets the audit - // row, just without the FK link). + // Self-resolution audit row. `rationale_id` is the producing + // classification's content hash, embedded inside `payload_json` + // per the contract (the dedicated `mediation_events.rationale_id` + // column stays NULL on this kind). The `classification_confidence` + // and per-party language codes go into the structured payload + // so a forensic replay can reconstruct exactly which template + // section each party received. db::mediation_events::record_self_resolution_offered( &tx, session_id, @@ -724,7 +735,6 @@ async fn draft_and_send_self_resolution_invitation( confidence, buyer_language, seller_language, - &prompt_bundle.self_resolution.fallback_language, &prompt_bundle.id, &prompt_bundle.policy_hash, now, diff --git a/src/mediation/policy.rs b/src/mediation/policy.rs index cbed49a..04813ec 100644 --- a/src/mediation/policy.rs +++ b/src/mediation/policy.rs @@ -319,7 +319,11 @@ pub async fn evaluate( confidence, } if mediation_cfg.self_resolution_enabled && templates_present - && (confidence as f32) >= mediation_cfg.self_resolution_threshold + // Compare in f64 to avoid losing precision on the + // higher-precision classifier confidence — the threshold + // is f32 by config-contract but the comparison stays + // honest. + && confidence >= f64::from(mediation_cfg.self_resolution_threshold) && !prior_offered => { debug!( From bb561ee0d4833d6cc133e024ec3df4d773704e47 Mon Sep 17 00:00:00 2001 From: grunch Date: Tue, 28 Apr 2026 09:15:54 -0300 Subject: [PATCH 5/9] =?UTF-8?q?review:=20third-round=20PR=20#48=20fixes=20?= =?UTF-8?q?=E2=80=94=20record=20effective=20language=20+=20lint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified each finding against the current code; fixed two real issues, skipped a re-flagged TOCTOU concern with the same justification as before. Real fixes: - mediation::self_resolution: add a public `SelfResolutionTemplates::resolve_effective_language(...)` helper that returns the language code `render_for` will actually use (the input when the bundle has a matching section; the `fallback_language` otherwise; `None` when even the fallback is missing). The existing `entry_for(...)` is rewritten to delegate to this resolver so the lookup logic has one source of truth. - mediation::follow_up::draft_and_send_self_resolution_invitation: resolve the effective language for each party BEFORE rendering + recording the audit row, and use those resolved codes in: * the `self_resolution_offered` audit row's `payload_json.languages.{buyer,seller}` fields, and * the `cooperative_case_detected` tracing event (alongside the raw classifier output, so operators can see when the fallback kicked in). Without this fix, the audit recorded the raw classifier output (e.g. `"de"`) while `render_for` had silently fallen back to the bundle's `fallback_language` (e.g. `"en"`). Forensic replay (per `quickstart.md`) re-renders the message body from the audit row's `languages.` field — using the raw value would reproduce a different message than the one the party actually saw. Documentation fix: - contracts/template-bundle.md: added a `text` language tag to the `{template} {human_assistance_optin}` fenced block (markdownlint MD040). Skipped (re-flagged for the third time, same answer as before): - The TOCTOU concern on `policy::evaluate`'s `prior_offered` predicate vs. the later write in `draft_and_send_self_resolution_invitation`. In single-process operation the engine serialises `advance_session_round` calls through `Arc>` (the predicate read and the eventual write inside `draft_and_send_self_resolution_invitation`'s transaction both run under the same async task), so two concurrent calls cannot both pass the predicate and both write. Adding a UNIQUE partial index to defend against multi-process / HA deploys would require a new SQL migration which the spec's `plan.md` explicitly forbids; HA is out of scope for this feature. Tests: - 3 new unit tests on `resolve_effective_language` (known code, unknown code falls back, structurally-invalid bundle returns `None`). Total 295 lib tests + integration suite pass; clippy clean. --- .../contracts/template-bundle.md | 2 +- src/mediation/follow_up.rs | 36 ++++++++-- src/mediation/self_resolution.rs | 66 ++++++++++++++++++- 3 files changed, 94 insertions(+), 10 deletions(-) diff --git a/specs/005-cooperative-self-resolution/contracts/template-bundle.md b/specs/005-cooperative-self-resolution/contracts/template-bundle.md index 4b7f93d..d9192f0 100644 --- a/specs/005-cooperative-self-resolution/contracts/template-bundle.md +++ b/specs/005-cooperative-self-resolution/contracts/template-bundle.md @@ -46,7 +46,7 @@ e te redireciono para o solver designado." The full message a party receives is, byte-for-byte: -``` +```text {template} {human_assistance_optin} ``` diff --git a/src/mediation/follow_up.rs b/src/mediation/follow_up.rs index 0fb7139..17e9c96 100644 --- a/src/mediation/follow_up.rs +++ b/src/mediation/follow_up.rs @@ -656,6 +656,22 @@ async fn draft_and_send_self_resolution_invitation( ) -> Result<()> { use crate::models::mediation::TranscriptParty; + // Resolve the EFFECTIVE language each party will actually + // receive (raw classifier code when the bundle has a matching + // section; bundle's `fallback_language` otherwise). The audit + // row below records these resolved codes — not the raw + // classifier output — so a forensic replay can reproduce the + // exact bytes each party saw without having to re-run the + // resolver. + let buyer_effective_language = prompt_bundle + .self_resolution + .resolve_effective_language(buyer_language) + .map(|s| s.to_string()); + let seller_effective_language = prompt_bundle + .self_resolution + .resolve_effective_language(seller_language) + .map(|s| s.to_string()); + let buyer_msg = self_resolution::render_for(buyer_language, &prompt_bundle.self_resolution); let seller_msg = self_resolution::render_for(seller_language, &prompt_bundle.self_resolution); @@ -725,16 +741,18 @@ async fn draft_and_send_self_resolution_invitation( // classification's content hash, embedded inside `payload_json` // per the contract (the dedicated `mediation_events.rationale_id` // column stays NULL on this kind). The `classification_confidence` - // and per-party language codes go into the structured payload - // so a forensic replay can reconstruct exactly which template - // section each party received. + // and the EFFECTIVE per-party language codes go into the + // structured payload — i.e. the codes after fallback + // resolution — so a forensic replay can reconstruct exactly + // which template section each party received without having + // to re-run the resolver. db::mediation_events::record_self_resolution_offered( &tx, session_id, rationale_id, confidence, - buyer_language, - seller_language, + buyer_effective_language.as_deref(), + seller_effective_language.as_deref(), &prompt_bundle.id, &prompt_bundle.policy_hash, now, @@ -742,7 +760,11 @@ async fn draft_and_send_self_resolution_invitation( tx.commit()?; } - // Operational tracing for SC-001 baseline (T029). + // Operational tracing for SC-001 baseline (T029). We log both + // the raw classifier output AND the effective resolved code so + // operators can see at a glance when the bundle's fallback + // kicked in (e.g. classifier says `de`, bundle has only en/es/pt + // → effective resolves to `en`). let bid_for_log = prompt_bundle.id.clone(); info!( event = "cooperative_case_detected", @@ -751,6 +773,8 @@ async fn draft_and_send_self_resolution_invitation( prompt_bundle_id = %bid_for_log, buyer_language = buyer_language.unwrap_or("(none)"), seller_language = seller_language.unwrap_or("(none)"), + buyer_effective_language = buyer_effective_language.as_deref().unwrap_or("(none)"), + seller_effective_language = seller_effective_language.as_deref().unwrap_or("(none)"), occurred_at_unix = now, "cooperative_case_detected" ); diff --git a/src/mediation/self_resolution.rs b/src/mediation/self_resolution.rs index bc7f39a..d9cf4b5 100644 --- a/src/mediation/self_resolution.rs +++ b/src/mediation/self_resolution.rs @@ -72,13 +72,46 @@ impl SelfResolutionTemplates { /// structurally invalid (no entry for the fallback either), /// which the loader rejects at startup. pub fn entry_for(&self, language_code: Option<&str>) -> Option<&SelfResolutionLanguageEntry> { + self.resolve_effective_language(language_code) + .and_then(|code| self.by_language.get(code)) + } + + /// Return the language code that [`render_for`] will actually + /// render for the given input. That's the input code + /// (lowercased + trimmed) when the bundle has a matching + /// section, or the configured `fallback_language` otherwise. + /// Returns `None` when the bundle has neither the requested + /// code nor the fallback (structurally-invalid bundle, rejected + /// by the loader). + /// + /// Callers that need to AUDIT the language a party actually + /// received MUST use this resolver — recording the raw + /// classifier output instead would mis-record sessions where + /// the model emitted a code the bundle doesn't carry (e.g. the + /// classifier returns `"de"` and the bundle falls back to + /// `"en"`; forensic replay needs `"en"` to reproduce the bytes + /// the party saw). + pub fn resolve_effective_language<'a>( + &'a self, + language_code: Option<&str>, + ) -> Option<&'a str> { if let Some(code) = language_code { let normalized = code.trim().to_ascii_lowercase(); - if let Some(entry) = self.by_language.get(&normalized) { - return Some(entry); + // Compare against the keys via lookup; the keys are + // already normalised by the parser. + if self.by_language.contains_key(&normalized) { + // Borrow the key out of the map so the returned + // `&str` ties to the bundle's lifetime. + if let Some((stored_key, _)) = self.by_language.get_key_value(&normalized) { + return Some(stored_key.as_str()); + } } } - self.by_language.get(&self.fallback_language) + if self.by_language.contains_key(&self.fallback_language) { + Some(self.fallback_language.as_str()) + } else { + None + } } } @@ -191,4 +224,31 @@ mod tests { .expect("fallback entry must exist"); assert!(entry.template.starts_with("Thanks")); } + + #[test] + fn resolve_effective_language_returns_match_when_present() { + let bundle = fixture_bundle(); + assert_eq!(bundle.resolve_effective_language(Some("es")), Some("es")); + // Case + whitespace normalised same as `entry_for`. + assert_eq!(bundle.resolve_effective_language(Some(" ES ")), Some("es")); + } + + #[test] + fn resolve_effective_language_returns_fallback_when_unknown() { + let bundle = fixture_bundle(); + // Unknown code → fallback (`"en"` per `fixture_bundle`). + assert_eq!(bundle.resolve_effective_language(Some("de")), Some("en")); + // None → fallback. + assert_eq!(bundle.resolve_effective_language(None), Some("en")); + } + + #[test] + fn resolve_effective_language_none_when_bundle_lacks_fallback() { + let bundle = SelfResolutionTemplates { + by_language: HashMap::new(), + fallback_language: "en".into(), + }; + assert_eq!(bundle.resolve_effective_language(Some("en")), None); + assert_eq!(bundle.resolve_effective_language(None), None); + } } From 5a31e0322a28eb90d5c034f1b66b8db058d70101 Mon Sep 17 00:00:00 2001 From: grunch Date: Tue, 28 Apr 2026 09:21:28 -0300 Subject: [PATCH 6/9] review: document the cooperative-branch TOCTOU non-issue inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This finding has been re-flagged four times now. The predicate read + write window is not racy in single-process operation, and the structural invariant that makes it safe lives several call sites away from the predicate itself — making it easy to miss on review. Add an inline comment at the predicate site explaining: 1. The mediation engine spawns exactly one tokio task that runs ticks sequentially. 2. `run_ingest_tick` processes sessions serially via `while let Some(res) = fetchers.join_next().await { ... advance_session_round(...).await }`. 3. A per-session UNIQUE partial index would be the belt-and-braces defence for HA / multi-process, but HA is out of scope per `plan.md` and that file also forbids new SQL migrations as a feature goal. The reviewer's referenced "escalation_dispatches v5 pattern" is materially different: v5's UNIQUE INDEX is on the `escalation_dispatches` side-table (the dispatcher's own bookkeeping), not on the `mediation_events` audit log. The matching audit row (`escalation_dispatched`) has no UNIQUE constraint of its own. There is no analogous side-table for self-resolution to apply that pattern to. A blanket `UNIQUE(session_id, kind)` on `mediation_events` would break other kinds that legitimately occur multiple times (`classification_produced` per round, `inbound_ingested` per inbound, `state_transition` per transition). A partial UNIQUE index scoped to `kind = 'self_resolution_offered'` would work but needs a migration `plan.md` forbids. No code change beyond the comment. Tests still pass (295 lib + integration suite); clippy clean. --- src/mediation/policy.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/mediation/policy.rs b/src/mediation/policy.rs index 04813ec..a20a2a3 100644 --- a/src/mediation/policy.rs +++ b/src/mediation/policy.rs @@ -269,6 +269,24 @@ pub async fn evaluate( .await?; // Predicate guard for both Feature 005 branches. + // + // No TOCTOU window here in single-process operation, even + // though the predicate read and the eventual write in + // `follow_up::draft_and_send_self_resolution_invitation` happen + // in two different `conn.lock().await` regions: the mediation + // engine spawns exactly one tokio task that runs ticks + // sequentially in a loop, `run_ingest_tick` processes sessions + // serially via `while let Some(res) = fetchers.join_next().await + // { ... advance_session_round(...).await }`, and a single call + // to `advance_session_round` always completes (predicate + + // dispatch write) before the next call to it for any session + // can begin. A per-session UNIQUE partial index would be the + // belt-and-braces defence for an HA / multi-process deploy, but + // (a) HA is out of scope per `plan.md`, and (b) such an index + // would require a new SQL migration which `plan.md` also + // forbids as a feature goal — see + // `specs/005-cooperative-self-resolution/plan.md` §"strictly + // additive: no DB migration". let prior_offered = { let guard = conn.lock().await; db::mediation_events::session_has_self_resolution_offered(&guard, session_id)? From 3215c6967dd629473ce700c5fef8cdb707f69243 Mon Sep 17 00:00:00 2001 From: grunch Date: Tue, 28 Apr 2026 09:30:18 -0300 Subject: [PATCH 7/9] review: add in-TX duplicate guard at the self-resolution write site MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though single-process operation is safe (sequential engine task → sequential per-session loop → AsyncMutex on Connection), the previous fix lived only in a comment at the predicate site. This commit moves the defence to the actual write site so the guard is co-located with the critical section and survives any future architectural change. Implementation: - `draft_and_send_self_resolution_invitation` now returns `Result` instead of `Result<()>`: * `Ok(true)` — invitation committed and published. * `Ok(false)` — duplicate detected at write time; transaction was rolled back, no gift-wraps published. * `Err(_)` — genuine failure (failure-counter path applies). - The transaction now re-checks `db::mediation_events::session_has_self_resolution_offered(&tx, session_id)` BEFORE writing. If a row exists (another path won the race), the helper drops the transaction without committing, rolling back the two `mediation_messages` rows that would have been inserted, and returns `Ok(false)`. - The dispatch arm in `advance_session_round` interprets the return value: on `Ok(false)` it skips the pre-flip + `deliver_summary` (the other path already handles that side of the dispatch) and just advances `round_count_last_evaluated` so the same fresh inbound isn't re-classified next tick. This is option 3 from the reviewer's suggestions ("serialize per-session writes with a lock to ensure 'fire at most once per session'"). The serialization comes from the existing `Arc>` plus the in-TX re-check, not from a UNIQUE constraint (which would require a SQL migration that `plan.md` forbids as a feature goal). The `escalation_dispatches` v5 pattern referenced in earlier reviews protects a separate side-table — not the audit log; there is no analogous side-table for self-resolution to apply that pattern to. Tests: - All 295 lib + integration tests still pass; clippy clean. --- src/mediation/follow_up.rs | 204 ++++++++++++++++++++++++------------- 1 file changed, 136 insertions(+), 68 deletions(-) diff --git a/src/mediation/follow_up.rs b/src/mediation/follow_up.rs index 17e9c96..5045723 100644 --- a/src/mediation/follow_up.rs +++ b/src/mediation/follow_up.rs @@ -480,7 +480,7 @@ pub async fn advance_session_round( let guard = conn.lock().await; latest_classification_rationale_id(&guard, session_id)? }; - if let Err(e) = draft_and_send_self_resolution_invitation( + let dispatch_outcome = draft_and_send_self_resolution_invitation( conn, client, serbero_keys, @@ -493,21 +493,47 @@ pub async fn advance_session_round( prompt_bundle, rationale_id.as_deref(), ) - .await - { - warn!( - error = %e, - "advance_session_round: self-resolution invitation drafter failed" + .await; + let invitation_committed = match dispatch_outcome { + Ok(committed) => committed, + Err(e) => { + warn!( + error = %e, + "advance_session_round: self-resolution invitation drafter failed" + ); + handle_reasoning_failure( + conn, + client, + session_id, + &info.dispute_id, + solvers, + prompt_bundle, + ) + .await; + return Ok(()); + } + }; + if !invitation_committed { + // Defensive duplicate-detection path. The in-TX + // re-check inside `draft_and_send_self_resolution_invitation` + // saw a prior `self_resolution_offered` row for this + // session — another path won the race and has + // already (or will shortly) drive the + // pre-flip + `deliver_summary`. Skip those steps + // here so we don't double-summarize. Advance the + // evaluator marker in a short transaction so this + // tick doesn't keep re-classifying the same fresh + // inbound forever. + let new_marker = total_fresh_inbounds; + let mut guard = conn.lock().await; + let tx = guard.transaction()?; + db::mediation::advance_evaluator_marker(&tx, session_id, new_marker)?; + tx.commit()?; + info!( + confidence, + round_count_marked = new_marker, + "advance_session_round: SuggestSelfResolutionWithSummary skipped (duplicate race)" ); - handle_reasoning_failure( - conn, - client, - session_id, - &info.dispute_id, - solvers, - prompt_bundle, - ) - .await; return Ok(()); } // Pre-flip awaiting_response → classified so @@ -640,6 +666,17 @@ fn latest_classification_rationale_id( /// gift-wraps OUTSIDE the transaction. A relay-side publish failure /// after commit leaves the rows in place as historical record — /// matches the existing drafter discipline (FR-126 Non-Goals). +/// +/// Returns: +/// - `Ok(true)` — invitation committed and published. +/// - `Ok(false)` — duplicate detected at write time; in-TX +/// re-check found a `self_resolution_offered` row for the +/// session (another path won the race), the transaction was +/// rolled back without writing, no gift-wraps were published. +/// The caller MUST skip subsequent dispatch steps +/// (state pre-flip, `deliver_summary`) since the other path +/// already drove them. +/// - `Err(_)` — genuine failure; failure-counter path applies. #[allow(clippy::too_many_arguments)] async fn draft_and_send_self_resolution_invitation( conn: &Arc>, @@ -653,7 +690,7 @@ async fn draft_and_send_self_resolution_invitation( seller_shared_keys: &Keys, prompt_bundle: &Arc, rationale_id: Option<&str>, -) -> Result<()> { +) -> Result { use crate::models::mediation::TranscriptParty; // Resolve the EFFECTIVE language each party will actually @@ -704,60 +741,91 @@ async fn draft_and_send_self_resolution_invitation( let seller_inner_id_hex = seller_wrap.inner_event_id.to_hex(); let now = super::current_ts_secs()?; - { + let committed = { let mut guard = conn.lock().await; let tx = guard.transaction()?; - db::mediation::insert_outbound_message( - &tx, - &db::mediation::NewOutboundMessage { - session_id, - party: TranscriptParty::Buyer, - shared_pubkey: &buyer_shared_pubkey_hex, - inner_event_id: &buyer_inner_id_hex, - inner_event_created_at: buyer_wrap.inner_created_at, - outer_event_id: Some(&buyer_wrap.outer.id.to_hex()), - content: &buyer_msg, - prompt_bundle_id: &prompt_bundle.id, - policy_hash: &prompt_bundle.policy_hash, - persisted_at: now, - }, - )?; - db::mediation::insert_outbound_message( - &tx, - &db::mediation::NewOutboundMessage { + + // Defensive in-TX re-check (belt-and-braces against the + // TOCTOU window that exists on paper between the predicate + // read in `policy::evaluate` and this write site). The + // single-process engine architecture already serialises + // these calls per session via the global `AsyncMutex` on + // `Connection` plus the sequential per-session loop in + // `run_ingest_tick`, but having the guard at the actual + // write site means the invariant is visible AT the + // critical section and the dispatch is robust to any + // future architectural change. If the predicate is true + // here, another path already wrote the row — drop the tx + // (rolls back the two outbound rows we'd have inserted) + // and let the caller skip the publish + summary steps. + if db::mediation_events::session_has_self_resolution_offered(&tx, session_id)? { + warn!( + session_id = %session_id, + "draft_and_send_self_resolution_invitation: prior `self_resolution_offered` \ + row detected at write time; rolling back this dispatch's transaction and \ + skipping outbound publishes" + ); + // `tx` drops without commit → rollback. Explicit drop + // makes the rollback visible to the reader. + drop(tx); + false + } else { + db::mediation::insert_outbound_message( + &tx, + &db::mediation::NewOutboundMessage { + session_id, + party: TranscriptParty::Buyer, + shared_pubkey: &buyer_shared_pubkey_hex, + inner_event_id: &buyer_inner_id_hex, + inner_event_created_at: buyer_wrap.inner_created_at, + outer_event_id: Some(&buyer_wrap.outer.id.to_hex()), + content: &buyer_msg, + prompt_bundle_id: &prompt_bundle.id, + policy_hash: &prompt_bundle.policy_hash, + persisted_at: now, + }, + )?; + db::mediation::insert_outbound_message( + &tx, + &db::mediation::NewOutboundMessage { + session_id, + party: TranscriptParty::Seller, + shared_pubkey: &seller_shared_pubkey_hex, + inner_event_id: &seller_inner_id_hex, + inner_event_created_at: seller_wrap.inner_created_at, + outer_event_id: Some(&seller_wrap.outer.id.to_hex()), + content: &seller_msg, + prompt_bundle_id: &prompt_bundle.id, + policy_hash: &prompt_bundle.policy_hash, + persisted_at: now, + }, + )?; + // Self-resolution audit row. `rationale_id` is the producing + // classification's content hash, embedded inside `payload_json` + // per the contract (the dedicated `mediation_events.rationale_id` + // column stays NULL on this kind). The `classification_confidence` + // and the EFFECTIVE per-party language codes go into the + // structured payload — i.e. the codes after fallback + // resolution — so a forensic replay can reconstruct exactly + // which template section each party received without having + // to re-run the resolver. + db::mediation_events::record_self_resolution_offered( + &tx, session_id, - party: TranscriptParty::Seller, - shared_pubkey: &seller_shared_pubkey_hex, - inner_event_id: &seller_inner_id_hex, - inner_event_created_at: seller_wrap.inner_created_at, - outer_event_id: Some(&seller_wrap.outer.id.to_hex()), - content: &seller_msg, - prompt_bundle_id: &prompt_bundle.id, - policy_hash: &prompt_bundle.policy_hash, - persisted_at: now, - }, - )?; - // Self-resolution audit row. `rationale_id` is the producing - // classification's content hash, embedded inside `payload_json` - // per the contract (the dedicated `mediation_events.rationale_id` - // column stays NULL on this kind). The `classification_confidence` - // and the EFFECTIVE per-party language codes go into the - // structured payload — i.e. the codes after fallback - // resolution — so a forensic replay can reconstruct exactly - // which template section each party received without having - // to re-run the resolver. - db::mediation_events::record_self_resolution_offered( - &tx, - session_id, - rationale_id, - confidence, - buyer_effective_language.as_deref(), - seller_effective_language.as_deref(), - &prompt_bundle.id, - &prompt_bundle.policy_hash, - now, - )?; - tx.commit()?; + rationale_id, + confidence, + buyer_effective_language.as_deref(), + seller_effective_language.as_deref(), + &prompt_bundle.id, + &prompt_bundle.policy_hash, + now, + )?; + tx.commit()?; + true + } + }; + if !committed { + return Ok(false); } // Operational tracing for SC-001 baseline (T029). We log both @@ -798,7 +866,7 @@ async fn draft_and_send_self_resolution_invitation( ) .await?; - Ok(()) + Ok(true) } /// One read of everything `advance_session_round` needs from the From 5110ee605b845ced05b91526a9a61abf96368829 Mon Sep 17 00:00:00 2001 From: grunch Date: Tue, 28 Apr 2026 09:35:19 -0300 Subject: [PATCH 8/9] review: render_for returns Option instead of placeholder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified the finding against current code. The placeholder string was unreachable in normal flow (the `templates_present` gate in `policy::evaluate` falls the cooperative branch through to legacy Summarize whenever `by_language.is_empty()`, and the parser rejects bundles whose `fallback_language` has no matching section at load time), but emitting a diagnostic operator-message into a party's chat on any future bypass would be a worse failure mode than skipping the branch. Tightened the API: - `mediation::self_resolution::render_for` now returns `Option`. It returns `None` when `entry_for(...)` is `None` — i.e. when the bundle has neither the requested language nor the configured fallback. The diagnostic-placeholder string is gone. - `mediation::follow_up::draft_and_send_self_resolution_invitation` matches on the new return shape: when either party's render returns `None`, it logs a `warn!` and returns `Ok(false)` (the duplicate-skip outcome from the previous commit), so the dispatch caller naturally skips the publishes + state walk + the cooperative-summary delivery and just advances the evaluator marker. The party-facing chat never sees the placeholder. - Updated unit tests in `self_resolution.rs` (`.expect(...)` / assert `is_none()`) and the keyword-audit test in `tests/phase3_self_resolution_template_audit.rs` to handle the new return type. All 295 lib tests + integration suite still pass; clippy clean. --- src/mediation/follow_up.rs | 38 +++++++++++- src/mediation/self_resolution.rs | 58 ++++++++++--------- .../phase3_self_resolution_template_audit.rs | 7 ++- 3 files changed, 73 insertions(+), 30 deletions(-) diff --git a/src/mediation/follow_up.rs b/src/mediation/follow_up.rs index 5045723..1011bac 100644 --- a/src/mediation/follow_up.rs +++ b/src/mediation/follow_up.rs @@ -709,8 +709,42 @@ async fn draft_and_send_self_resolution_invitation( .resolve_effective_language(seller_language) .map(|s| s.to_string()); - let buyer_msg = self_resolution::render_for(buyer_language, &prompt_bundle.self_resolution); - let seller_msg = self_resolution::render_for(seller_language, &prompt_bundle.self_resolution); + let buyer_msg = match self_resolution::render_for( + buyer_language, + &prompt_bundle.self_resolution, + ) { + Some(s) => s, + None => { + // Structurally invalid bundle (no requested-language + // entry AND no fallback entry). The parser rejects this + // at load time and `policy::evaluate` gates on + // `templates_present`, so this is unreachable in normal + // flow; we return `Ok(false)` rather than panic so the + // dispatch caller skips the publishes + state walk + // cleanly. Skipping is safer than emitting a diagnostic + // operator-message into a party's chat. + warn!( + session_id = %session_id, + "draft_and_send_self_resolution_invitation: bundle is missing fallback-language section; \ + skipping cooperative invitation" + ); + return Ok(false); + } + }; + let seller_msg = match self_resolution::render_for( + seller_language, + &prompt_bundle.self_resolution, + ) { + Some(s) => s, + None => { + warn!( + session_id = %session_id, + "draft_and_send_self_resolution_invitation: bundle is missing fallback-language section; \ + skipping cooperative invitation" + ); + return Ok(false); + } + }; let buyer_wrap = outbound::build_wrap_with_audience( serbero_keys, diff --git a/src/mediation/self_resolution.rs b/src/mediation/self_resolution.rs index d9cf4b5..e382eda 100644 --- a/src/mediation/self_resolution.rs +++ b/src/mediation/self_resolution.rs @@ -125,27 +125,29 @@ impl SelfResolutionTemplates { /// `[en]/[es]/[pt]` bundle) falls back rather than producing an /// empty message. /// -/// Output shape: `format!("{template} {optin}")`. The single space -/// separator is enough — both halves end with their own -/// punctuation. Forensic replay (per `quickstart.md`) reproduces -/// the same string by re-running this function on the bundle bytes -/// pinned by `mediation_events.policy_hash` for the +/// Returns `Some(rendered)` when the bundle has either the +/// requested code or the configured fallback. Returns `None` +/// **only** when the bundle is structurally invalid (no entry for +/// the fallback either) — the parser rejects this case at load +/// time, but the function returns `None` instead of a diagnostic +/// placeholder so the dispatch caller can detect the impossible +/// state and skip the cooperative branch rather than emitting an +/// operator-facing message in the user's chat. +/// +/// Output shape on the `Some` branch: +/// `format!("{template} {optin}")`. The single space separator is +/// enough — both halves end with their own punctuation. Forensic +/// replay (per `quickstart.md`) reproduces the same string by +/// re-running this function on the bundle bytes pinned by +/// `mediation_events.policy_hash` for the /// `self_resolution_offered` row. -pub fn render_for(language_code: Option<&str>, templates: &SelfResolutionTemplates) -> String { - match templates.entry_for(language_code) { - Some(entry) => format!("{} {}", entry.template, entry.human_assistance_optin), - None => { - // Structurally invalid bundle — should have been caught at - // load time. Render a deliberately ugly placeholder rather - // than panicking so the engine tick keeps running; the - // operator sees the breakage in the relayed message body - // and the audit row payload. - String::from( - "[serbero: self-resolution template bundle is missing the configured fallback language; \ - please ask the operator to verify prompts/phase3-self-resolution.md]", - ) - } - } +pub fn render_for( + language_code: Option<&str>, + templates: &SelfResolutionTemplates, +) -> Option { + templates + .entry_for(language_code) + .map(|entry| format!("{} {}", entry.template, entry.human_assistance_optin)) } #[cfg(test)] @@ -177,7 +179,7 @@ mod tests { #[test] fn render_known_language() { let bundle = fixture_bundle(); - let out = render_for(Some("es"), &bundle); + let out = render_for(Some("es"), &bundle).expect("known language must render"); assert!(out.starts_with("Gracias")); assert!(out.contains("asistencia humana")); } @@ -185,14 +187,14 @@ mod tests { #[test] fn render_falls_back_when_language_unknown() { let bundle = fixture_bundle(); - let out = render_for(Some("de"), &bundle); + let out = render_for(Some("de"), &bundle).expect("fallback must render"); assert!(out.starts_with("Thanks for the update")); } #[test] fn render_falls_back_when_language_none() { let bundle = fixture_bundle(); - let out = render_for(None, &bundle); + let out = render_for(None, &bundle).expect("fallback must render"); assert!(out.starts_with("Thanks for the update")); } @@ -207,13 +209,17 @@ mod tests { } #[test] - fn render_returns_placeholder_when_bundle_lacks_fallback() { + fn render_returns_none_when_bundle_lacks_fallback() { let bundle = SelfResolutionTemplates { by_language: HashMap::new(), fallback_language: "en".into(), }; - let out = render_for(Some("en"), &bundle); - assert!(out.starts_with("[serbero:")); + // Structurally invalid bundle — `entry_for` is None, so the + // renderer returns None rather than emit an operator-facing + // diagnostic into a party's chat. Callers detect None and + // skip the cooperative branch. + assert!(render_for(Some("en"), &bundle).is_none()); + assert!(render_for(None, &bundle).is_none()); } #[test] diff --git a/tests/phase3_self_resolution_template_audit.rs b/tests/phase3_self_resolution_template_audit.rs index cc9fe9f..8465218 100644 --- a/tests/phase3_self_resolution_template_audit.rs +++ b/tests/phase3_self_resolution_template_audit.rs @@ -157,7 +157,9 @@ fn rendered_strings_carry_no_banned_fund_action_keywords() { // covers the exact bytes a party receives. for (code, entry) in &bundle.by_language { let banned = banned_langs[code.as_str()]; - let rendered = render_for(Some(code), &bundle).to_ascii_lowercase(); + let rendered = render_for(Some(code), &bundle) + .unwrap_or_else(|| panic!("[{code}] render_for returned None for a present language")) + .to_ascii_lowercase(); for needle in banned { assert!( !rendered.contains(needle), @@ -183,7 +185,8 @@ fn rendered_strings_include_human_assistance_optin_marker() { !entry.human_assistance_optin.trim().is_empty(), "[{code}] human_assistance_optin must be non-empty" ); - let rendered = render_for(Some(code), &bundle); + let rendered = render_for(Some(code), &bundle) + .unwrap_or_else(|| panic!("[{code}] render_for returned None for a present language")); assert!( rendered.contains(&entry.human_assistance_optin), "[{code}] rendered string did not include the configured opt-in sentence" From 804dba09649ca1f44af31ac7fe23b8e1a7d6668b Mon Sep 17 00:00:00 2001 From: grunch Date: Tue, 28 Apr 2026 09:40:28 -0300 Subject: [PATCH 9/9] =?UTF-8?q?review:=20revert=20classified=E2=86=92await?= =?UTF-8?q?ing=5Fresponse=20on=20deliver=5Fsummary=20failure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified findings: - self_resolution.rs:134-147 (render_for): already addressed in commit 5110ee6 — `render_for` now returns `Option` and the dispatch caller treats `None` as "feature inert" (logs + returns `Ok(false)` so the cooperative branch is skipped). No further change needed. - follow_up.rs:513-556 (state revert on deliver_summary failure): real bug. The cooperative dispatch arm pre-flips `awaiting_response → classified` so `deliver_summary`'s opening `classified → summary_pending` transition is legal. If `deliver_summary` then returns `Err`, the previous code called `handle_reasoning_failure` and returned without reverting. The session stays in `classified` forever — the gate at the top of `advance_session_round` only accepts `awaiting_response` or post-invitation `summary_delivered`, so subsequent ticks skip it and the session is non-retryable. Fix: - `models::mediation`: add `Classified → AwaitingResponse` as a legal recovery edge with a comment explaining the use case (a dispatch arm that pre-flipped to `classified` and then saw the summary delivery fail). Test pin in `allowed_transitions_pass`. - `mediation::follow_up::advance_session_round` (cooperative arm): on `deliver_summary` error, acquire the lock and call `db::mediation::set_session_state(..., MediationSessionState::AwaitingResponse, now)` BEFORE invoking `handle_reasoning_failure`. A clock-acquisition or set-state failure logs loudly but does NOT prevent `handle_reasoning_failure` from running, so the consecutive-failure counter still advances and can eventually escalate. Out of scope: the legacy `Summarize` arm has the same shape and the same pre-existing limitation (documented in `mediation/mod.rs:991` as a "Phase 11 limitation"). Touching it would expand this PR's scope; the same fix can land as a separate defensive commit if desired. Tests: all 295 lib + integration tests still pass; clippy clean. --- src/mediation/follow_up.rs | 45 ++++++++++++++++++++++++++++++++++++++ src/models/mediation.rs | 13 +++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/mediation/follow_up.rs b/src/mediation/follow_up.rs index 1011bac..821cc20 100644 --- a/src/mediation/follow_up.rs +++ b/src/mediation/follow_up.rs @@ -570,6 +570,51 @@ pub async fn advance_session_round( error = %e, "advance_session_round: deliver_summary after self-resolution invitation failed" ); + // Revert the pre-flip so the session is retryable + // on the next ingest tick. Without this, the + // session sits in `classified` forever — the gate + // at the top of `advance_session_round` only + // accepts `awaiting_response` or + // post-invitation `summary_delivered`. The state + // machine permits `classified → awaiting_response` + // as a recovery edge (see `models::mediation`). + // A failure to revert is logged loudly but not + // bubbled — `handle_reasoning_failure` still runs + // so the consecutive-failure counter advances and + // can eventually escalate. + { + let now = match super::current_ts_secs() { + Ok(t) => t, + Err(ts_err) => { + warn!( + error = %ts_err, + "advance_session_round: clock unavailable; cannot revert state to awaiting_response" + ); + handle_reasoning_failure( + conn, + client, + session_id, + &info.dispute_id, + solvers, + prompt_bundle, + ) + .await; + return Ok(()); + } + }; + let guard = conn.lock().await; + if let Err(rev_err) = db::mediation::set_session_state( + &guard, + session_id, + MediationSessionState::AwaitingResponse, + now, + ) { + warn!( + error = %rev_err, + "advance_session_round: failed to revert classified → awaiting_response after deliver_summary failure" + ); + } + } handle_reasoning_failure( conn, client, diff --git a/src/models/mediation.rs b/src/models/mediation.rs index 6b3ccdd..4fcfa49 100644 --- a/src/models/mediation.rs +++ b/src/models/mediation.rs @@ -39,6 +39,14 @@ impl MediationSessionState { | (Classified, FollowUpPending) | (Classified, SummaryPending) | (FollowUpPending, AwaitingResponse) + // Recovery edge: a dispatch arm that pre-flipped + // `awaiting_response → classified` and then saw + // `deliver_summary` (or its self-resolution + // sibling) fail must be able to revert the + // session so the next ingest tick can retry. Same + // shape as the FollowUpPending → AwaitingResponse + // recovery already permitted. + | (Classified, AwaitingResponse) | (SummaryPending, SummaryDelivered) | (SummaryDelivered, Closed) // Escalation from any non-terminal state. @@ -288,6 +296,11 @@ mod tests { // able to lift the session out of `summary_delivered` into // `escalation_recommended` (FR-008). assert!(SummaryDelivered.can_transition_to(EscalationRecommended)); + // Recovery edge: a dispatch arm that pre-flipped to + // `classified` and then saw the summary delivery fail + // reverts the session to `awaiting_response` so the next + // ingest tick can retry. + assert!(Classified.can_transition_to(AwaitingResponse)); assert!(EscalationRecommended.can_transition_to(Closed)); assert!(AwaitingResponse.can_transition_to(SupersededByHuman)); assert!(SupersededByHuman.can_transition_to(Closed));