diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e89544..a4f9335 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Policies under `core/`; relocated `oath-net-core` to `oath-adapter-net-api`; and added `oath-core-api`, `oath-core-kernel`, and the `oath-core`, `oath-strategy-host`, `oath-cli`, and `oath-supervisor` process crates. +- `MockTimer` relocated from `oath-adapter-net-http-mock` into a new dev-only + `oath-adapter-net-mock` crate beside the `Timer` contract in + `oath-adapter-net-api`, so the HTTP and (forthcoming) WebSocket mock stacks + share one fake clock without cross-depending (ADR-0034 §Amendments.4). + `oath-adapter-net-http-mock` now provides only `MockClient`/`MockBody`. ### Added diff --git a/Cargo.lock b/Cargo.lock index a05a462..77ac863 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -305,11 +305,18 @@ dependencies = [ "http", "http-body", "http-body-util", - "oath-adapter-net-api", "oath-adapter-net-http-api", "tokio", ] +[[package]] +name = "oath-adapter-net-mock" +version = "0.1.0" +dependencies = [ + "oath-adapter-net-api", + "tokio", +] + [[package]] name = "oath-adapter-net-ws-api" version = "0.1.0" @@ -543,9 +550,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.45" +version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368" dependencies = [ "proc-macro2", ] @@ -723,9 +730,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.117" +version = "2.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 8c4dd6a..8651497 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ resolver = "3" members = [ "crates/model", "crates/adapter/net/api", + "crates/adapter/net/mock", "crates/adapter/net/http/api", "crates/adapter/net/http/mock", "crates/adapter/net/ws/api", @@ -44,6 +45,7 @@ categories = ["finance", "asynchronous"] # does not treat { workspace = true } path deps as wildcard requirements. oath-model = { path = "crates/model", version = "0.1.0" } oath-adapter-net-api = { path = "crates/adapter/net/api", version = "0.1.0" } +oath-adapter-net-mock = { path = "crates/adapter/net/mock", version = "0.1.0" } oath-adapter-net-http-api = { path = "crates/adapter/net/http/api", version = "0.1.0" } oath-adapter-net-http-mock = { path = "crates/adapter/net/http/mock", version = "0.1.0" } oath-adapter-net-ws-api = { path = "crates/adapter/net/ws/api", version = "0.1.0" } diff --git a/crates/adapter/net/http/mock/Cargo.toml b/crates/adapter/net/http/mock/Cargo.toml index b197bf0..5b4bad9 100644 --- a/crates/adapter/net/http/mock/Cargo.toml +++ b/crates/adapter/net/http/mock/Cargo.toml @@ -9,7 +9,6 @@ license.workspace = true workspace = true [dependencies] -oath-adapter-net-api = { workspace = true } oath-adapter-net-http-api = { workspace = true } http = { workspace = true } bytes = { workspace = true } diff --git a/crates/adapter/net/http/mock/src/lib.rs b/crates/adapter/net/http/mock/src/lib.rs index e2ac4ad..0ad2cb9 100644 --- a/crates/adapter/net/http/mock/src/lib.rs +++ b/crates/adapter/net/http/mock/src/lib.rs @@ -1,15 +1,14 @@ -//! Test harness for the net-http stack: a canned-response `MockClient` leaf, a -//! frame-controllable `MockBody`, and a `MockTimer` virtual clock. Consumed by -//! downstream crates via `[dev-dependencies]` only — it has no production edge. +//! Test harness for the net-http stack: a canned-response `MockClient` leaf and +//! a frame-controllable `MockBody`. Consumed by downstream crates via +//! `[dev-dependencies]` only — it has no production edge. (The `MockTimer` +//! virtual clock now lives in the transport-neutral `oath-adapter-net-mock`.) #![forbid(unsafe_code)] pub mod body; pub mod client; -pub mod timer; pub use body::MockBody; pub use client::MockClient; -pub use timer::MockTimer; use std::sync::{Mutex, MutexGuard, PoisonError}; diff --git a/crates/adapter/net/mock/Cargo.toml b/crates/adapter/net/mock/Cargo.toml new file mode 100644 index 0000000..5999c50 --- /dev/null +++ b/crates/adapter/net/mock/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "oath-adapter-net-mock" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true + +[lints] +workspace = true + +[dependencies] +oath-adapter-net-api = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true } diff --git a/crates/adapter/net/mock/src/lib.rs b/crates/adapter/net/mock/src/lib.rs new file mode 100644 index 0000000..8577127 --- /dev/null +++ b/crates/adapter/net/mock/src/lib.rs @@ -0,0 +1,17 @@ +//! Transport-neutral test doubles for the net adapter stack: a `MockTimer` +//! virtual clock beside the `Timer` contract in `oath-adapter-net-api`. Consumed +//! via `[dev-dependencies]` only — it has no production edge, so the HTTP and WS +//! stacks can fake the same clock without dev-depending on each other's mock. +#![forbid(unsafe_code)] + +pub mod timer; + +pub use timer::MockTimer; + +use std::sync::{Mutex, MutexGuard, PoisonError}; + +/// Lock `mutex`, recovering the guard if a panic poisoned it — mock state stays +/// usable so a failing test reports its own assertion, not a poison panic. +fn lock(mutex: &Mutex) -> MutexGuard<'_, T> { + mutex.lock().unwrap_or_else(PoisonError::into_inner) +} diff --git a/crates/adapter/net/http/mock/src/timer.rs b/crates/adapter/net/mock/src/timer.rs similarity index 100% rename from crates/adapter/net/http/mock/src/timer.rs rename to crates/adapter/net/mock/src/timer.rs diff --git a/docs/superpowers/plans/2026-07-04-net-mock-extract.md b/docs/superpowers/plans/2026-07-04-net-mock-extract.md new file mode 100644 index 0000000..9b23f7f --- /dev/null +++ b/docs/superpowers/plans/2026-07-04-net-mock-extract.md @@ -0,0 +1,271 @@ +# net-mock extraction (WS resilience PR0) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Relocate `MockTimer` out of `oath-adapter-net-http-mock` into a new dev-only crate `oath-adapter-net-mock`, beside the `Timer` contract in `oath-adapter-net-api`, so the HTTP and (forthcoming) WebSocket mock stacks share one fake clock without dev-depending on each other's mock crate. + +**Architecture:** A pure relocation — no behavior change. `MockTimer` moves verbatim (via `git mv`, preserving history) into the new crate; `oath-adapter-net-http-mock` keeps only `MockClient`/`MockBody`; the workspace gains one member. `MockTimer` has **no consumers today** (it was built ahead of the HTTP resilience layers), so nothing external needs repointing — the acceptance surface is "both mock crates still build and test green, `just machete` stays green, and the reachability guard holds." + +**Tech Stack:** Rust (edition 2024, MSRV 1.90), `just`, `cargo`. No new external dependencies — the new crate reuses the existing `oath-adapter-net-api` + `tokio` (dev) workspace deps. + +**Source spec:** [docs/superpowers/specs/2026-07-04-net-ws-resilience-design.md](../specs/2026-07-04-net-ws-resilience-design.md) — this is **PR0** of that spec's PR map. Mandated by [ADR-0034](../../adr/0034-http-construction-surface-auth-guarded-boot-coverage.md) §Amendments.4, which relocates `MockTimer` to `oath-adapter-net-mock` expressly because "the WS resilience slice (ADR-0033 §9) is imminent." + +## Global Constraints + +Every task implicitly includes these (from CLAUDE.md, the workspace `[workspace.lints]`, and the spec): + +- **Edition 2024, MSRV 1.90.** No `unsafe` — `unsafe_code = "deny"`; the new crate carries `#![forbid(unsafe_code)]` (as every mock crate does). +- **No `unwrap`/`expect`/indexing/panic in non-test code** — return `Result` / recover (the `lock` poison-recovery helper). Test code is exempt for `unwrap`/`expect`/indexing. +- **`just lint` runs clippy with `-D warnings`** and promotes `pedantic`/`nursery` to errors — all code including tests must be pedantic-clean: `#[must_use]` where clippy asks, document all public items (`missing_docs`), `Debug` on all public types (`missing_debug_implementations`), no unreachable `pub`. +- **Deps** via `[workspace.dependencies]` (internal crates carry an explicit `version`). +- **DoD:** `just ci` green (fmt, lint, test + doctests, doc, deny, typos, machete, …). Update `CHANGELOG.md` `[Unreleased]`. One issue → one branch → worktree under `.claude/worktrees/net-mock-extract` (never switch the primary checkout) → one PR (`Closes #`). + +--- + +## File Structure + +- `crates/adapter/net/mock/Cargo.toml` — **new crate** `oath-adapter-net-mock`. +- `crates/adapter/net/mock/src/lib.rs` — **new.** Crate root: `MockTimer` re-export + the `lock` poison-recovery helper. +- `crates/adapter/net/mock/src/timer.rs` — **moved** verbatim from `crates/adapter/net/http/mock/src/timer.rs`. +- `crates/adapter/net/http/mock/src/lib.rs` — **modify.** Drop the `timer` module + `MockTimer` re-export; update the module doc. +- `crates/adapter/net/http/mock/Cargo.toml` — **modify.** Drop the now-unused `oath-adapter-net-api` dependency — it was used **only** by `timer.rs`, so once that file leaves, `just machete` (deny-level) fails until it is removed. +- `Cargo.toml` (workspace) — **modify.** Add the member + the `[workspace.dependencies]` entry. +- `CHANGELOG.md` — **modify.** `[Unreleased] → Changed`. +- **No README change** — the crate table + mermaid graph list only the `*-api` contract crates; the existing dev-only mock crates (`net-http-mock`, `net-ws-mock`) are already absent, so `net-mock` follows that established convention. + +Two tasks: Task 1 is the relocation (one atomic, reviewable unit); Task 2 is the CHANGELOG + guard + gate + PR wrap. + +--- + +## Setup: issue + worktree + +- [ ] **Create the issue** + +```bash +gh issue create \ + --title "refactor(net): extract MockTimer into shared oath-adapter-net-mock crate (WS resilience PR0)" \ + --label enhancement \ + --body "PR0 of the net-ws resilience surface (spec: docs/superpowers/specs/2026-07-04-net-ws-resilience-design.md), mandated by ADR-0034 §Amendments.4. + +Relocate \`MockTimer\` from \`oath-adapter-net-http-mock\` into a new dev-only \`oath-adapter-net-mock\` crate beside the \`Timer\` contract in \`oath-adapter-net-api\`, so the HTTP and WS mock stacks share one fake clock without cross-depending. \`net-http-mock\` keeps only \`MockClient\`/\`MockBody\`." +``` + +Note the issue number `#` for the PR body. + +- [ ] **Confirm the worktree** + +The isolated worktree already exists (created during planning) and holds the committed spec + this plan: + +```bash +git worktree list | grep net-mock-extract +# .../.claude/worktrees/net-mock-extract [refactor/net-mock-extract] +cd .claude/worktrees/net-mock-extract +``` + +All subsequent tasks run inside this worktree. (If it is missing, recreate it: `git worktree add .claude/worktrees/net-mock-extract -b refactor/net-mock-extract main`.) + +--- + +## Task 1: Relocate `MockTimer` into `oath-adapter-net-mock` + +**Files:** +- Create: `crates/adapter/net/mock/Cargo.toml`, `crates/adapter/net/mock/src/lib.rs` +- Move: `crates/adapter/net/http/mock/src/timer.rs` → `crates/adapter/net/mock/src/timer.rs` +- Modify: `crates/adapter/net/http/mock/src/lib.rs`, `crates/adapter/net/http/mock/Cargo.toml`, root `Cargo.toml` + +**Interfaces:** +- Consumes: `oath_adapter_net_api::Timer` (the trait `MockTimer` implements); `tokio` (dev, for the moved tests). +- Produces: `oath_adapter_net_mock::MockTimer` — the identical public API as before (`MockTimer::new()`, `Default`, `advance(&self, Duration)`, `impl Timer`), now importable from the transport-neutral crate. PR1 (`net-ws-mock`'s `MockSpawn`) and the future HTTP resilience layers dev-depend on it here. + +- [ ] **Step 1: Register the new crate in the workspace** + +In the root `Cargo.toml`, add the member directly after the `net/api` entry (keeping the net crates grouped): + +```toml + "crates/adapter/net/api", + "crates/adapter/net/mock", +``` + +and the internal-crate dependency directly after the `oath-adapter-net-api` entry in `[workspace.dependencies]`: + +```toml +oath-adapter-net-api = { path = "crates/adapter/net/api", version = "0.1.0" } +oath-adapter-net-mock = { path = "crates/adapter/net/mock", version = "0.1.0" } +``` + +- [ ] **Step 2: Create the new crate's `Cargo.toml`** + +Create `crates/adapter/net/mock/Cargo.toml`: + +```toml +[package] +name = "oath-adapter-net-mock" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true + +[lints] +workspace = true + +[dependencies] +oath-adapter-net-api = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true } +``` + +- [ ] **Step 3: Create the new crate root `src/lib.rs`** + +Create `crates/adapter/net/mock/src/lib.rs` (this also creates the `src/` dir the next step's `git mv` targets). It carries its own copy of the `lock` helper, exactly as `net-http-mock` and `net-ws-mock` each do — the moved `timer.rs` calls `crate::lock`: + +```rust +//! Transport-neutral test doubles for the net adapter stack: a `MockTimer` +//! virtual clock beside the `Timer` contract in `oath-adapter-net-api`. Consumed +//! via `[dev-dependencies]` only — it has no production edge, so the HTTP and WS +//! stacks can fake the same clock without dev-depending on each other's mock. +#![forbid(unsafe_code)] + +pub mod timer; + +pub use timer::MockTimer; + +use std::sync::{Mutex, MutexGuard, PoisonError}; + +/// Lock `mutex`, recovering the guard if a panic poisoned it — mock state stays +/// usable so a failing test reports its own assertion, not a poison panic. +fn lock(mutex: &Mutex) -> MutexGuard<'_, T> { + mutex.lock().unwrap_or_else(PoisonError::into_inner) +} +``` + +- [ ] **Step 4: Move `timer.rs` verbatim (preserving history)** + +```bash +git mv crates/adapter/net/http/mock/src/timer.rs crates/adapter/net/mock/src/timer.rs +``` + +Do **not** edit the file's contents — it already imports `crate::lock` and `oath_adapter_net_api::Timer`, both of which resolve identically in the new crate. (The verbatim move is why this is a `git mv`, not a re-paste: no transcription risk, history preserved.) + +- [ ] **Step 5: Drop `MockTimer` from `net-http-mock`** + +Edit `crates/adapter/net/http/mock/src/lib.rs` — remove the `timer` module line, remove the `MockTimer` re-export, and update the module doc. The full new file: + +```rust +//! Test harness for the net-http stack: a canned-response `MockClient` leaf and +//! a frame-controllable `MockBody`. Consumed by downstream crates via +//! `[dev-dependencies]` only — it has no production edge. (The `MockTimer` +//! virtual clock now lives in the transport-neutral `oath-adapter-net-mock`.) +#![forbid(unsafe_code)] + +pub mod body; +pub mod client; + +pub use body::MockBody; +pub use client::MockClient; + +use std::sync::{Mutex, MutexGuard, PoisonError}; + +/// Lock `mutex`, recovering the guard if a panic poisoned it — mock state stays +/// usable so a failing test reports its own assertion, not a poison panic. +fn lock(mutex: &Mutex) -> MutexGuard<'_, T> { + mutex.lock().unwrap_or_else(PoisonError::into_inner) +} +``` + +Then edit `crates/adapter/net/http/mock/Cargo.toml` — remove `oath-adapter-net-api` from `[dependencies]`. It was used **only** by the now-moved `timer.rs`, so leaving it fails `just machete` (deny-level). (`lock` stays — `client.rs` still uses it; the `tokio` + `http-body-util` dev-deps stay — `body.rs`/`client.rs` tests still use them.) + +- [ ] **Step 6: Verify both crates build and test green** + +Run: `cargo test -p oath-adapter-net-mock -p oath-adapter-net-http-mock` +Expected: PASS — the moved timer tests (`repeated_poll_does_not_stack_waiters`, `advance_moves_now_and_wakes_sleepers`) run under `oath-adapter-net-mock`; `net-http-mock`'s `MockClient`/`MockBody` tests still pass with no `MockTimer`. + +- [ ] **Step 7: Verify lint + machete are clean** + +Run: `just lint && just machete` +Expected: PASS — no clippy warnings; no unused dependency (in particular `net-http-mock`'s `tokio`/`http-body-util` remain used by `body.rs`/`client.rs`; `net-mock`'s `oath-adapter-net-api` is used by `timer.rs` and `tokio` by its tests). + +- [ ] **Step 8: Commit** + +```bash +git add crates/adapter/net/mock crates/adapter/net/http/mock/src/lib.rs crates/adapter/net/http/mock/Cargo.toml Cargo.toml Cargo.lock +git commit -m "refactor(net): extract MockTimer into shared oath-adapter-net-mock crate" +``` + +--- + +## Task 2: Reachability guard, CHANGELOG, full gate, PR + +**Files:** +- Modify: `CHANGELOG.md` + +- [ ] **Step 1: Assert the dev-only reachability guard** + +Both mock crates must stay unreachable from production code (ADR-0034 §Amendments.4). + +Run: `cargo tree -e no-dev -i oath-adapter-net-mock` +Expected: **no non-dev dependent.** At PR0 nothing depends on `net-mock` yet (its first dev-dependent, `net-ws-mock`, arrives in PR1), so the command prints only the crate itself with no parent lines. If any crate appears as a dependent under `-e no-dev`, that is a production leak — stop and fix before proceeding. + +- [ ] **Step 2: Update the CHANGELOG** + +Add to `CHANGELOG.md` under `## [Unreleased]` → `### Changed` (as the last bullet of that subsection): + +```markdown +- `MockTimer` relocated from `oath-adapter-net-http-mock` into a new dev-only + `oath-adapter-net-mock` crate beside the `Timer` contract in + `oath-adapter-net-api`, so the HTTP and (forthcoming) WebSocket mock stacks + share one fake clock without cross-depending (ADR-0034 §Amendments.4). + `oath-adapter-net-http-mock` now provides only `MockClient`/`MockBody`. +``` + +- [ ] **Step 3: Run the full local gate** + +Run: `just ci` +Expected: green (fmt, lint, test + doctests, doc, deny, typos, machete, …). + +- [ ] **Step 4: Commit, push, PR** + +```bash +git add CHANGELOG.md +git commit -m "docs(changelog): net-mock extraction (WS resilience PR0)" +git push -u origin refactor/net-mock-extract +gh pr create \ + --title "refactor(net): extract MockTimer into shared oath-adapter-net-mock crate (WS resilience PR0)" \ + --body "Closes # + +PR0 of the net-ws resilience surface (spec: docs/superpowers/specs/2026-07-04-net-ws-resilience-design.md), mandated by ADR-0034 §Amendments.4. + +- New dev-only crate \`oath-adapter-net-mock\` holding \`MockTimer\`, beside the \`Timer\` contract in \`oath-adapter-net-api\`. +- \`MockTimer\` moved verbatim (history preserved via \`git mv\`); \`oath-adapter-net-http-mock\` now provides only \`MockClient\`/\`MockBody\`. +- Lets the HTTP and (forthcoming) WS mock stacks share one fake clock without a WS-mock → HTTP-mock cross-dependency. +- No behavior change; \`MockTimer\` had no consumers yet, so nothing external is repointed. Both mock crates keep the \`cargo tree -e no-dev -i\` production-reachability guard. + +This PR also lands the WS resilience design spec (docs/superpowers/specs/2026-07-04-net-ws-resilience-design.md) and this plan. + +🤖 Generated with [Claude Code](https://claude.com/claude-code)" +``` + +Expected: PR open, GitHub Actions CI green (same `just ci` + MSRV job). + +--- + +## Self-Review + +**Spec coverage (PR0 in the spec's PR map):** +- Create `oath-adapter-net-mock`, move `MockTimer` in, `net-http-mock` keeps only `MockClient`/`MockBody` — Task 1. ✅ +- Repoint consumers — **none exist** (`MockTimer` is unconsumed today), so nothing to repoint; acceptance is HTTP-tests-green — Task 1 Step 6. ✅ +- Production-reachability guard (`cargo tree -e no-dev -i`) — Task 2 Step 1. ✅ +- Workspace member + `[workspace.dependencies]` entry — Task 1 Step 1. ✅ +- README — deliberately unchanged (mock crates are absent by convention); noted in File Structure. ✅ +- CHANGELOG + `just ci` + one-issue-one-PR mechanics — Task 2. ✅ +- Amends ADR-0033 §9 (`MockTimer` home) via ADR-0034 §Amendments.4 — cited in the header + CHANGELOG. ✅ + +**Placeholder scan:** none — every code step carries the actual file content or the exact `git mv`; every run step the exact command + expected result. (`#` is the standard issue-number substitution, filled at Setup.) + +**Type consistency:** `MockTimer`'s public surface (`new`, `Default`, `advance`, `impl Timer`) is unchanged by a verbatim `git mv`; `crate::lock` resolves in `net-mock` because Step 3's `lib.rs` defines it; `oath-adapter-net-api`/`tokio` are the only deps `timer.rs` needs and both are declared in Step 2's `Cargo.toml`. The `net-http-mock` `lib.rs` rewrite in Step 5 keeps `lock`, `body`, `client` — all still used. + +**Known risks to watch during impl:** +- `git mv` requires the destination `src/` dir to exist — Step 3 (creating `lib.rs`) precedes Step 4, so it does. +- If `Cargo.lock` is committed in this repo, Step 8 stages it; if the repo `.gitignore`s it, the `git add Cargo.lock` is a harmless no-op. +- `cargo tree -i` on a crate with zero dependents prints just the crate node — that is the guard passing, not an error. + diff --git a/docs/superpowers/specs/2026-07-04-net-ws-resilience-design.md b/docs/superpowers/specs/2026-07-04-net-ws-resilience-design.md new file mode 100644 index 0000000..409dcbd --- /dev/null +++ b/docs/superpowers/specs/2026-07-04-net-ws-resilience-design.md @@ -0,0 +1,306 @@ +# `net-ws` resilience surface — design + +**Status:** Approved design, pre-implementation. +**Date:** 2026-07-04. +**Crates:** + +- `oath-adapter-net-ws-api` (`crates/adapter/net/ws/api`) — the resilience stack lands here + (reconnect actor, layers, `stack()`), on top of the contract shipped in #65. +- `oath-adapter-net-ws-mock` (`crates/adapter/net/ws/mock`, dev-only) — gains `MockSpawn`. +- `oath-adapter-net-mock` (`crates/adapter/net/mock`, **new, dev-only**) — `MockTimer`, extracted + from `net-http-mock` to sit beside the transport-neutral `Timer` contract in `net-api`, so both + the HTTP and WS stacks fake the same clock without dev-depending on each other's crate. +- `oath-adapter-net-ws-tungstenite` (`crates/adapter/net/ws/tungstenite`, **future**) — the real + backend leaf; **roadmapped here, designed in its own spec** (PR6). + +## Context + +[ADR-0032](../../adr/0032-websocket-transport-contract-duplex-frames-lifecycle.md) fixed the +`net-ws-api` **contract** — the untyped duplex frame channel, the asymmetric `Stream`-recv / +RPITIT-send split, the epoch-stamped lifecycle channel, the no-silent-drop backpressure +*guarantee*, the `WsConnector` leaf seam, and the per-transport `AuthSource` — and that contract +**already landed** (`Frame`/`CloseFrame`, `WsError`, `WsSink`/`WsSource`, `Lifecycle`/ +`LifecycleSnapshot`, `WsConnector`, plus the `net-ws-mock` harness) in **PR #65**. + +[ADR-0033](../../adr/0033-websocket-resilience-reconnect-actor-watch-lifecycle.md) then fixed the +**resilience stack that wraps that contract** — the reconnect actor over a new `Spawn` seam, the +two-axis layer stack, the watch-lifecycle *delivery form*, the dual-bound drop-oldest buffer +mechanism, the inverting-but-surviving circuit breaker, the send-axis rate limit, and the +`stack()`/`build()` construction split. **ADR-0033 answers every architectural question.** This +spec does **not** re-decide any of it. + +What ADR-0033 deliberately leaves to implementation — and what this spec closes — is the +**decomposition**: how a stack this size is carved into small, one-issue-one-PR slices that are +each independently mock-testable *before* the pieces they compose exist. The governing constraint +(ADR-0033 §9, ADR-0031's rationale) is that the resilience logic lives in the contract crate, not +the backend, precisely so a mock clock + mock spawn + mock leaf can regression-test it; the cut +lines below all serve that testability. + +### Governing ADRs + +- **ADR-0033** — the WebSocket resilience decision record; the **source of truth** for every + behavior sliced below. Section references (e.g. "§7") point into it. +- **ADR-0032** — the WS contract this stack wraps; landed in #65. `WsConnector::connect` (the + composition seam) and the `Lifecycle` watch are unchanged by this spec. +- **ADR-0029** — the runtime-neutral kernel: `Layer`/`ServiceBuilder` (assembly ergonomics only, + ADR-0033 §1), `ErrorKind`/`HasErrorKind` (breaker classification, §7), the `Timer` clock, and + the compile-time `impl`/RPITIT-no-`dyn` binding style. +- **ADR-0003 / ADR-0006** — the adapter anti-corruption boundary: session keepalive, subscription + replay, and loss reconciliation are venue grammar and stay in `oath-adapter-ibkr`, not here. +- **ADR-0004 / ADR-0022** — consume the lifecycle watch (`down_since`/`attempts`/`epoch`) for the + risk-layer trading halt that the inverting breaker relocates the "break" to (§7). +- **ADR-0014** — the Telemetry plane that the deferred lossy edge-feed and `Tracing` route to. +- **[ADR-0034](../../adr/0034-http-construction-surface-auth-guarded-boot-coverage.md)** — the HTTP + construction-surface ADR (landed #66). Two of its decisions bind this spec directly: **§1** is the + authoritative `AuthSource` shape PR1's WS mirror copies; **§Amendments.4** *mandates* the shared + `net-mock` extraction (PR0) and names "the WS resilience slice (ADR-0033 §9) is imminent" as the + time-critical reason — so PR0 is not this spec's invention but an already-recorded ADR decision. +- The **net-http construction-surface spec** + ([2026-06-30](2026-06-30-net-http-construction-surface-design.md)) — the sibling pattern this + spec mirrors (decomposition spec + PR map + shared `net-mock` crate). + +## Goal + +Turn ADR-0033's resilience stack into an ordered PR map in which every slice: (a) lands one +issue → one branch → one worktree → one PR under `just ci`; (b) is fully unit- or table-testable +against `MockWsConnector` + `MockTimer` + `MockSpawn` at the moment it lands, with no "untestable +until a later PR" gap; and (c) keeps each highest-consequence behavior — the invert-vs-survive +breaker above all — in a pure, table-tested unit rather than buried in the un-unit-testable actor +loop. The terminal state of this spec is a plan for **PR0**, then successive plans per PR. + +## Scope (in) + +The `net-ws-api` resilience surface and its mock infrastructure — **PRs 0–5**: + +- **`Spawn`** seam (§2) in `net-ws-api`; `MockSpawn` deterministic executor in `net-ws-mock`. +- **`AuthSource` + `NoAuth`** (§8, ADR-0032 §8) in `net-ws-api` — the WS mirror of the HTTP seam, + over the `http::Request<()>` handshake. +- **`WsConfig`** (§9) — non-generic plain data; every knob the layers read. +- **The recv-axis units** (§3/§4/§6): `Heartbeat`/liveness and the dual-bound drop-oldest + `Buffer`. +- **The pure pacing & policy units** (§2/§7/§8): `SendRateLimit` and the extracted + **`ReconnectPolicy`** (classification + backoff + attempt-rate gate). +- **The reconnect actor** (§2/§5/§7): connect-time `Auth`/`ConnectTimeout`, the spawned actor, + epoch/lifecycle writes, `ReconnectingConnection` + `WsControl`, `ReconnectingConnector`. +- **`stack()`** (§9) — assembly over an arbitrary leaf — plus `Tracing` (§3) and the + ordering-invariant regression matrix. +- **The shared `oath-adapter-net-mock` crate (PR0)** — `MockTimer` extracted from `net-http-mock` + so WS can fake the clock without a dev-dep on an HTTP crate. + +## Non-goals (deferred — each its own issue/PR or spec) + +| Deferred item | Why deferred | Lands with | +| --- | --- | --- | +| `net-ws-tungstenite` leaf + `build()` (tokio `Spawn`/`Timer`, tokio-tungstenite + rustls, real-socket tests) | Real-backend I/O + integration concerns deserve their own design pass; the `net-ws-api` surface is fully mock-testable without it (ADR-0033 §9) | **PR6 — its own spec** | +| The **lossy edge-transition feed** (§5 last bullet) | Audit/telemetry only (ADR-0014 plane), explicitly *out* of the safety channel so the safety channel carries no never-drop obligation for the audit log; not needed by risk logic, which keys on the watch's epoch | Telemetry integration / adapter, not this workstream | +| Session keepalive (`tic`, `/tickle`), per-topic `smd` staleness timer + `umd+`/`smd+` refresh, subscription replay on `Resumed`, conservative reconcile-on-`Lagged`, venue sequence-gap detection, the `ErrorKind`→permanent classification *refinement* hook values, and the concrete `WsConfig` values | Venue grammar — the ADR-0003/0006 boundary (ADR-0033 Consequences) | The `oath-adapter-ibkr` slices | +| `max_attempts` voluntary give-up on a non-critical stream (§7) | Orthogonal axis to involuntary permanent failure; not in core `ConnState` | If/when a non-critical stream needs it | + +## Decisions + +### The PR map + +All code lands in `net-ws-api` unless noted. Each PR is one issue → one worktree under +`.claude/worktrees/` → one PR (`Closes #N`), `CHANGELOG.md` `[Unreleased]` updated, `just ci` +green. + +| PR | Contents | Crate(s) | New deps | Testable at landing via | +| --- | --- | --- | --- | --- | +| **PR0** | Extract `MockTimer` → new `oath-adapter-net-mock`; repoint `net-http-mock` | `net-mock` (new), `net-http-mock`, `net-http-api` (dev-dep) | — | existing HTTP tests still green off the moved clock | +| **PR1** | `Spawn` seam; `AuthSource`+`NoAuth`; `WsConfig` plain-data; `MockSpawn` | `net-ws-api`, `net-ws-mock` | (minimal) | inline stubs + `NoAuth` + `MockSpawn` step-pump | +| **PR2** | `Heartbeat` (auto-`Pong`, swallow `Pong`, `Close`→lifecycle, idle→`Stale`, active idle-probe); `Buffer` (dual-bound drop-oldest ring, `total_lagged`) — **recv axis** | `net-ws-api` | `event-listener`, `futures-util` | `MockTimer` + scripted frames + recording pong sink | +| **PR3** | `SendRateLimit` (token bucket on a `WsSink`, **send axis**); `ReconnectPolicy` (classify → `Decision`, capped-exp backoff, attempt-rate gate — **clock-free, table-only**, connection axis) | `net-ws-api` | — | pure/table tests + `MockTimer` (rate limit only) | +| **PR4** | connect-time `Auth`+`ConnectTimeout`; the spawned reconnect actor (owns socket, channel-backed sink, epoch bump, `Resumed`, lifecycle writes, per-(re)connect auth); **drives** `ReconnectPolicy`; composes Heartbeat-socket-side-of-Buffer + a smoke assertion; `ReconnectingConnection`+`WsControl`; `ReconnectingConnector` | `net-ws-api` | — | `MockSpawn` + `MockWsConnector` scripted disconnects/`ErrorKind`s + `MockTimer` | +| **PR5** | `stack(leaf,cfg,timer,auth,spawn) -> impl ReconnectingConnector`; `Tracing` (outermost span, folded here); the full ordering-invariant regression matrix | `net-ws-api` | `tracing` | full mock stack (leaf+clock+executor) | +| **PR6** | `net-ws-tungstenite` leaf + `build()` — **roadmapped, own spec** | `net-ws-tungstenite` (new) | tokio, tokio-tungstenite, rustls | its own spec | + +`async-watch` is **not** a new dep — it landed with the lifecycle channel in #65 +([`lifecycle.rs`](../../../crates/adapter/net/ws/api/src/lifecycle.rs)). Each PR2/PR5 external dep +maps to exactly one named unit (`event-listener`→`Buffer` wakeups; `futures-util`→stream +processing, promoted from #65's dev-dep to a production dep per ADR-0032; `tracing`→the `Tracing` +layer). + +### PR0 — the shared `net-mock` crate + +**ADR-0034 §Amendments.4 already decided this** — it relocates `MockTimer` into a new dev-only +`oath-adapter-net-mock` (`crates/adapter/net/mock`) beside the `Timer` contract, expressly because +"the WS resilience slice (ADR-0033 §9) is imminent" and the alternative is duplicating `MockTimer` +or dev-depending a *WS* mock on an *HTTP* mock (the nonsense edge across the crate cut). PR0 is that +extraction, executed as the first step of this workstream. It supersedes ADR-0033 §9's original +placement of `MockTimer` in `net-ws-mock`. + +Scope (decisive, per ADR-0034): **move** `MockTimer` out — `net-http-mock` keeps **only** +`MockClient` — into `oath-adapter-net-mock`, add the new member + README graph entry, and repoint +the `net-http-api` / `net-http-mock` tests' dev-dep to it. Acceptance: the existing HTTP tests stay +green off the moved clock, **and** the production-reachability guard holds — `cargo tree -e no-dev +-i oath-adapter-net-mock` shows no non-dev dependents (ADR-0034 §Amendments.4; the same guard +`net-http-mock` and `net-ws-mock` carry). `MockSpawn` stays in `net-ws-mock` (PR1) because it mocks +a `net-ws-api` trait, not the transport-neutral `Timer`. + +### PR1 — seams + mock infra + +- **`Spawn`** (§2): a minimal runtime-neutral seam in `net-ws-api` — the second abstraction + alongside `Timer`, for the one long-lived task (the actor) that outlives any single `call`. Shape + (pinned exactly at the TDD step): + + ```rust + pub trait Spawn: Clone + Send + Sync + 'static { + /// Spawn a detached long-lived task. Shutdown is via `WsControl::shutdown` + /// (a channel the task selects on), not an abort handle — so a fire-and-forget + /// return keeps the seam minimal (ADR-0033 §2). + fn spawn(&self, task: impl Future + Send + 'static); + } + ``` + +- **`AuthSource` + `NoAuth`** (§8; ADR-0032 §8): the WS mirror of the **ADR-0034 §1** HTTP seam, + deliberately the **same seam design** (RPITIT, `Send`-bounded, mutate-in-place, one concrete + transport error), with the two necessary transport differences: it stamps the **`http::Request<()>` + handshake parts** (the WS upgrade is a bodyless GET), and its error is **`WsError`**. Unlike HTTP — + which added `HttpError::Auth` in its PR2 for this seam — **`WsError::Auth` already exists** (landed + with the #65 contract, `→ ErrorKind::Auth`), so PR1 adds **no** new error variant. + + ```rust + pub trait AuthSource: Clone + Send + Sync { + fn authorize(&self, handshake: &mut http::Request<()>) + -> impl Future> + Send; + } + pub struct NoAuth; // IBKR local gateway holds the session cookie → ready Ok(()) + ``` + + Landed in PR1 as a foundational contract, tested via `NoAuth` (ready-`Ok`, `Send`-assertion, like + HTTP's); **first consumed in PR4** (connect-time `Auth`, re-stamped per (re)connect). + +- **`WsConfig`** (§9): non-generic plain data — connect timeout; backoff (base, cap, factor) + + connection-attempt-rate cap (max attempts / window); buffer bounds (`max_count`, `max_bytes`); + liveness (idle-read timeout, active-ping interval, idle threshold); send-rate (tokens, refill); + permanent-error policy (retries-before-`Unrecoverable`). **No `RateKey`/`K` generic and no + boot-time coverage check** — a WS send limit is per-connection (one pipe), a genuine reduction + vs. the net-http surface (§9). Landed whole; later PRs read subsets. + +- **`MockSpawn`** (net-ws-mock, §9): a test-controlled, single-threaded, manually-pumped executor — + *not* a tokio spawner. The whole point of the `Spawn` seam: only a deterministic executor lets a + test drive the actor step by step and assert invariants without racing a background task + (`Timer`-style "controllable, not a no-op" applied to spawning). + +### PR2 — recv-axis units + +Built as **independent, standalone units with their own tests** (not buried in the actor loop, per +the deep-module cut that also governs the send/policy units) so both land and are fully tested +*before* the PR4 actor that composes them exists. + +- **`Heartbeat`** (§4): a frame-stream processor that, given the socket source, a `Pong`-capable + sink handle, a `Timer`, a `LifecycleSender`, and the `WsConfig` liveness knobs, yields a + **data-only** frame stream: auto-`Pong` on `Ping` (**mandatory** — Binance drops a socket that + misses it), swallow `Pong`, map `Close` to a lifecycle transition, a passive idle-read timeout → + `Stale`, and an active protocol-`Ping` when idle (*keepalive-when-idle*, since IBKR guarantees no + heartbeat on an idle/unsubscribed socket). It handles **transport liveness only**; session + keepalive is the adapter's (the hard §4 split). Table-tested with `MockTimer` + scripted frames + + a recording pong sink. +- **`Buffer`** (§6): the dual-bound drop-oldest ring — a producer (push data frame; on + `min(count, bytes)` overflow evict oldest, increment `total_lagged`; **never** drop the newest, + so a lone frame larger than the whole byte budget is still admitted) and a consumer (`Stream` of + `Frame`, `event-listener` wakeups — an `mpsc` cannot drop-oldest). Byte-accounting defeats the + IBKR-small-JSON assumption that OOMs on a Coinbase multi-MB level2 snapshot. A standalone data + structure; table-tested for eviction order, both bounds, oversized-frame admission, and the + `total_lagged` count. + +### PR3 — pure pacing & policy units + +The two guard-rail units, grouped because both are pure and table-testable with no socket, spawn, +or actor. (If you later want `ReconnectPolicy` on its own slice, it splits cleanly — nothing in +PR3 couples the two.) + +- **`SendRateLimit`** (§8): a token bucket wrapping a `WsSink`; `send()` awaits a token + (`Timer`-driven refill) — *backpressure-inside-`call`*, consistent with ADR-0032 §2 (which + rejected `Sink`'s `poll_ready` handshake, not all backpressure). Default off/generous so IBKR + never notices; configured with the venue inbound cap so a reconnect resubscribe-burst (~100 lines + > Binance's ~5/s) cannot flood → disconnect → ban. Table-tested with `MockTimer`. +- **`ReconnectPolicy`** (§2/§7) — the **highest-consequence logic in the stack, kept out of the + actor loop.** Three pure mechanisms behind one unit: + - `classify(kind: ErrorKind, attempts: u64, /* adapter hook */) -> Decision` where + `Decision::{ RetryAfter(Duration), Unrecoverable }`. Transient/unknown (`Connection`/`Timeout`) + → `RetryAfter` **forever** (the "break" relocates to the risk-layer halt via the watch, §7); + permanent (`Auth`, protocol reject) → a few retries then `Unrecoverable` (retrying a permanent + failure worsens the outage → Binance ban). + - capped-exponential `backoff(attempts) -> Duration`. + - the connection-attempt-rate gate: `admit(history, now: Instant) -> Result<(), Duration>` (300 + conns / 5 min / IP) so reconnect itself cannot storm into a ban. + **Clock-free**: `now` is an *input*; the actor owns the `Timer` and feeds it. Pure table tests + cover the full invert-vs-survive truth table with zero async. + +### PR4 — the reconnect actor (the heart) + +- **connect-time `Auth`** (wraps a `WsConnector`, stamps the handshake via `AuthSource` before + `leaf.connect`, re-run per attempt) and **`ConnectTimeout`** (a fresh `Timer`-bounded timeout per + attempt, so a hung handshake cannot wedge the backoff loop — §3's *ConnectTimeout-inside-Reconnect*). +- **The spawned actor** (§2/§5): owns the single socket; drains a **channel-backed** stable `WsSink` + (the adapter's sends survive a reconnect); on a break, consults `ReconnectPolicy`, rebuilds via + `leaf.connect`, re-injects auth, **bumps the epoch**, emits `Resumed{epoch}`, and writes each + `LifecycleSnapshot` through the `LifecycleSender` (overwrite, never blocks — the actor is never + backpressured by a slow risk consumer). It **drives** `ReconnectPolicy` (it is no longer *the* + policy) and composes **Heartbeat socket-side of Buffer** (control frames handled before the data + ring; auto-`Pong` never queued behind a slow data consumer). +- **`ReconnectingConnection { sink, source, lifecycle, control }`** + **`WsControl { force_reconnect, + shutdown }`** — the *usage* seam (§1), the richer handle produced only at assembly and handed to + the adapter once; the control verbs live **here, not on the sink** (data/control-plane split). + **`ReconnectingConnector`** is the factory trait `stack()`/`build()` return. +- **Ordering smoke assertion:** even though the full invariant matrix is a `stack()` property + (untestable until PR5, §9), PR4 ships at least one behavioral test that the actor it wires + absorbs a control frame (sends `Pong`) and that frame **never** surfaces in the buffered source — + so no one-PR window ships an actor with an unasserted recv order. + +### PR5 — assembly, invariants, and `Tracing` + +- **`stack(leaf, cfg, timer, auth, spawn) -> impl ReconnectingConnector`** (§9): the + canonical assembly over an arbitrary leaf, so the ordering invariants are regression-testable over + a deterministic leaf + clock + executor. +- **`Tracing`** (§3): the outermost span over all reconnects (secret-safe — auth is injected below + it). **Folded here, not given a lonely PR2 test**: a span wrapper's only real contract is + *outermost placement*, which is a `stack()` invariant, and asserting span entry needs a + subscriber — set up once, here, where the assembly tests live. +- **The ordering-invariant matrix** over `MockWsConnector` + `MockTimer` + `MockSpawn` (§3): `Auth` + innermost re-stamped per (re)connect (`Reconnect` = the `Retry`-analogue); `ConnectTimeout` inside + `Reconnect`; `Heartbeat` socket-side of `Buffer`; `Tracing` outermost. Plus the cross-cutting + behaviors the full stack now makes testable end-to-end: transient→retry-forever + `down_since`/ + `attempts` climb; permanent→`Unrecoverable`; epoch bump + `Resumed` on reconnect; auto-`Pong` + below a full buffer; `force_reconnect` via the control handle. + +## Resolved implementation open questions + +1. **Slice boundaries** → the PR0–5 map above (fine-grained, mirroring HTTP Slice-0's cadence); + PR6 (leaf) roadmapped to its own spec. +2. **`ReconnectPolicy` extraction** → its own pure unit in PR3 (classification + backoff + + attempt-rate gate), clock-free, table-only — the invert-vs-survive breaker does not live in the + actor loop. +3. **`Tracing` placement** → PR5 (assembly), not a contentless PR2 unit. +4. **`MockTimer` home** → shared `oath-adapter-net-mock` (PR0), extracted from `net-http-mock` per + **ADR-0034 §Amendments.4** (which already recorded this relocation, superseding ADR-0033 §9); + `MockSpawn` stays in `net-ws-mock` (it mocks a `net-ws-api` trait). Both mocks keep the + production-reachability guard. +5. **`AuthSource` placement & shape** → declared in PR1 (foundational, tested via `NoAuth`), + consumed in PR4; the HTTP seam's mirror over `http::Request<()>` / `WsError`. +6. **Lossy edge feed (§5)** → explicitly deferred (ADR-0014 telemetry plane), a decision not a + silent drop. +7. **Actor ordering coverage** → PR4 ships a recv-order smoke assertion; the full matrix is PR5. + +## Consequences + +- **New crate `oath-adapter-net-mock`** (dev-only) holding the shared `MockTimer` per ADR-0034 + §Amendments.4; `net-http-mock` keeps only `MockClient`; `net-ws-mock` gains `MockSpawn`. Both + mock crates remain dev-only — verified by the `cargo tree -e no-dev -i` reachability guard. +- **`net-ws-api` gains** (over #65): `Spawn`, `AuthSource`/`NoAuth`, `WsConfig`, `Heartbeat`, + `Buffer`, `SendRateLimit`, `ReconnectPolicy`, the reconnect actor, `Auth`/`ConnectTimeout`, + `ReconnectingConnection`/`ReconnectingConnector`/`WsControl`, `Tracing`, and `stack()`. New + production deps: `event-listener`, `tracing`, and `futures-util` (promoted from dev). Still + zero-runtime, zero-I/O — no `tokio`/`tokio-tungstenite`/`rustls`. +- **The adapter (`oath-adapter-ibkr`) owns** (unchanged from ADR-0033 Consequences): session + keepalive, the `smd` staleness timer + resubscribe, subscription replay on `Resumed`, the + conservative reconcile-on-`Lagged`, sequence-gap detection, the permanent-classification + refinement, and the concrete `WsConfig` values. +- **Diverges from ADR-0033 §9** in one respect only — `MockTimer` home → shared `net-mock` — and + that divergence is not this spec's: it was already recorded by **ADR-0034 §Amendments.4**, which + this spec follows. Every other ADR-0033 decision is implemented as written. +- **The `net-ws-tungstenite` leaf (PR6)** is the one piece this workstream does not build; the + `net-ws-api` surface it plugs into is complete and mock-verified after PR5. + +