diff --git a/openspec/changes/panic-response-layer.md b/openspec/changes/panic-response-layer.md new file mode 100644 index 00000000..030ab1aa --- /dev/null +++ b/openspec/changes/panic-response-layer.md @@ -0,0 +1,710 @@ +# Panic Response Layer + +## Summary + +Introduces a behavioral destabilization detection and intervention system for EpistemicLease. +Complements staleness tracking (freshness state) with a separate panic score that measures +observable navigation instability. Intervenes via two channels: MCP tool response injection +(existing) and a new PreToolUse hook that fires before every agent tool call — closing the +tunneling blind spot where a destabilized agent stops calling openlore entirely. + +Gryph integration (optional) enriches the panic score with shell/filesystem signals openlore +cannot observe directly. + +--- + +## Design Principles + +**Behavioral only.** Operates exclusively on observable runtime signals: navigation patterns, +trajectory density, oscillation coefficient, stale depth, write volume. No psychological +modeling, no intent inference, no chain-of-thought inspection. + +**Pacing over policing.** Interventions slow destabilizing execution and encourage re-anchoring. +They do not attempt to fix reasoning. + +**Soft-first escalation.** L1→L4 progressive; L4 advisory-only in initial version. + +**Recovery-first.** No permanent punishment states. `orient()` success applies strong score +reduction. All levels self-resolve on behavioral stabilization. + +**Dual-channel.** MCP injection (reaches agents using openlore) + hook injection (reaches all +agents regardless of openlore usage). + +**Fail-open.** Hook absence, read errors, and internal failures MUST NOT break MCP flow or +block agent operation. System correctness MUST NOT depend on hook execution. + +--- + +## Behavioral Space + +Five independent dimensions describe agent state. They can be opposed: + +| Situation | Interpretation | +|-----------|---------------| +| low density + low entropy | focused local work (coherent, no intervention needed) | +| high density + high entropy | productive exploration (risky but coherent) | +| high density + low entropy | panic probable (drift, retry loop) | +| low density + high oscillation | local contradiction (A↔B without progress) | +| stale + low panic | deep stale dive (known risk, focused) | +| fresh + high panic | recent orient(), still confused | +| stale + high locality confidence | locally coherent stale work (low risk) | +| stale + low locality confidence + drift | architectural isolation risk | + +**Absence of openlore calls is not a failure signal.** An agent doing focused local work with +high locality confidence has low need for orient() or graph traversal. The working set fits +in active context. Tool utilization is a proxy — the target is *appropriate* tool utilization, +not maximum utilization. + +The dangerous case is not "0 openlore calls" but: +``` +many files + large patches + oscillation + retry loops + cross-module + failure traces +AND no openlore calls +``` +That is architectural isolation risk. Focused single-file work with no orient() is rational. + +## Freshness vs Panic — Explicit Separation + +These are independent dimensions of epistemic state: + +```ts +interface EpistemicState { + freshness: FreshnessState; // architectural authority decay + panic: PanicState; // behavioral destabilization +} +``` + +``` +Freshness models epistemic authority decay. +Panic models behavioral destabilization. +Neither implies the other. + +An agent can be: +- stale but behaviorally calm (linear deep dive into stale context) +- fresh but panicking (rapid confused navigation after recent orient()) +``` + +**Coupling constraint:** Stale depth floors panic level (see Panic Ceiling section). +No other coupling exists. Freshness transitions and panic transitions are computed +independently. Metrics, thresholds, and tuning are kept separate. + +--- + +## Runtime Safety Invariants + +The following properties MUST hold regardless of the internal state of the panic system, +the state file, or Gryph availability: + +``` +- panic-check MUST fail open: exits 0, outputs {"decision":"allow"} on any internal error +- Gryph absence MUST have zero behavioral impact: null returned, no error, no log noise +- Telemetry failure MUST NOT affect tool execution: emit() never throws +- panic-state.json corruption MUST resolve to stable state (panicLevel 0, panicScore 0) +- Hook execution failure MUST NOT block MCP flow +- panic-check and openlore telemetry are excluded from panic computation: these CLI + commands read state but never call updateTracker — no recursive feedback loop +``` + +## Formal Invariants + +``` +- panicScore ∈ [0, 100] (always clamped, never drifts) +- staleDepth monotonically increases until orient() +- panicLevel transitions are hysteretic (no thrashing) +- panic-check exits 0 on all code paths including internal failures +- hook absence never breaks MCP flow +- panic-state.json writes are atomic (temp + rename) +- orient() recovery bonus diminishes with rapid repeat usage +- interventionCountSinceStable resets on: stable recovery (panicLevel→0), orient() reset, + 30min session expiry (state treated as expired, all fields zeroed) +``` + +--- + +## Architecture + +``` +openlore MCP server + └── computes panic score on every tool call + └── writes .openlore/panic-state.json atomically (temp + rename) + └── injects panic signals into MCP tool responses (existing channel) + +openlore panic-check CLI + └── reads .openlore/panic-state.json + └── fails open on parse errors / missing file + └── outputs structured response, always exits 0 + +PreToolUse hook (per agent, thin adapter, best-effort) + └── invokes: openlore panic-check --format + └── fires before EVERY tool call — not just openlore calls + └── closes tunneling blind spot + +Gryph (optional, gracefully absent) + └── detected via PATH at runtime + └── queried by panic-check when available + └── absence = zero-impact, not error +``` + +--- + +## Shared State File + +`.openlore/panic-state.json` — written by MCP server, read by hook without MCP round-trip. + +```json +{ + "schemaVersion": 1, + "panicScore": 42, + "panicLevel": 2, + "updatedAt": "2026-05-19T10:30:00Z", + "lastOrientAt": "2026-05-19T10:25:00Z", + "lastHookInterventionAt": "2026-05-19T10:29:00Z", + "recentOrientCount": 1, + "localityConfidence": 0.7, + "triggers": ["trajectory_burst", "oscillation_spike"], + "agentId": "claude-code", + "sessionId": "abc123", + "interventionCountSinceStable": 0 +} +``` + +**Writes MUST be atomic:** + +```ts +writeFileSync(`${path}.tmp`, json, 'utf-8'); +renameSync(`${path}.tmp`, path); +``` + +POSIX `rename(2)` is atomic on the same filesystem. Prevents partial reads and race +conditions between MCP server writes and hook reads. + +**Corruption handling:** `panic-check` MUST fail open. Invalid JSON, missing file, or +unreadable state is treated as stable state (panicLevel 0). Hook flow is never interrupted +by state file issues. + +**Session hard reset:** If `updatedAt` is more than 30 minutes in the past, `panic-check` +treats the state as expired: panicScore = 0, panicLevel = 0. Prevents zombie state from +polluting a new session. + +**Schema migration:** Consumers check `schemaVersion` before reading. Unknown versions are +treated as stable state (fail open). + +--- + +## Panic Score + +`panicScore ∈ [0, 100]` — clamped after every operation. + +### MCP-derived signals + +| Signal | Weight | +|--------|--------| +| Trajectory burst (density ≥ 0.60) | +15 | +| Oscillation spike (osc ≥ 0.50) | +10 | +| Stale depth 3 persistence (each call) | +25 | + +### Locality Confidence Modulation + +`localityConfidence ∈ [0,1]` is computed from both density and oscillation: + +``` +localityConfidence = (1 - min(1, density × 2)) × (1 - min(1, oscillation)) +``` + +High localityConfidence = sustained coherent local work. It modulates the panic system: + +| Signal | Gating | +|--------|--------| +| `stale_depth_3` (+25/call) | only fires when `localityConfidence < 0.5` | +| burst escalation (depth → 3) | only fires when `localityConfidence < 0.5` | +| locality recovery (−3/call) | fires when `density < 0.10 && oscillation < 0.10 && staleDepth = 0` | + +**Rationale:** a stale agent doing focused local work (`staleDepth = 3` but `localityConfidence = 0.9`) +is not in the same risk category as a stale agent drifting cross-module. Suppressing the +`stale_depth_3` signal in that case prevents the panic system from treating coherent deep +work as a destabilization event. + +This also means the system does NOT maximize orient() calls. It maximizes appropriate +recontextualization — only when the behavioral signals indicate it is actually needed. + +**Trajectory tracking continues while stale.** Module access window and oscillation score +accumulate during stale state so that post-stale burst and trajectory patterns remain +observable. The stale state does not freeze the behavioral model. + +**Depth-3 persistence intentionally saturates rapidly.** An agent at staleDepth 3 with 4+ +tool calls reaches score 100 within a single burst. This models runaway destabilization — +an agent deep in stale state continuing to make cross-module calls is exhibiting the exact +failure mode the panic layer exists to interrupt. Rapid saturation is a design choice, not +an accidental artifact. + +### Gryph-derived signals (optional) + +| Signal | Weight | Notes | +|--------|--------|-------| +| Large patch while stale (> 500 LOC) | +30 | Write event size — attenuated when commandEntropy is high (see below) | +| Contradiction persistence | +20 | See definition below | +| Repetitive shell retry burst | +15 | See definition below | + +**Raw tool frequency MUST NOT be used directly as a panic signal.** +Only low-entropy repetition patterns are destabilizing. Legitimate activity (builds, tests, +grep, git operations, batch AST traversal) routinely produces high tool frequency. The +signal of interest is behavioral collapse, not throughput. + +`commandEntropy` is normalized Shannon entropy over recent shell command signatures: + +``` +H(commands) = -Σ p(cmd) · log₂(p(cmd)) normalized to [0,1] over max possible entropy +Low entropy = repetitive retry loops (same command repeated, low diversity) +High entropy = exploratory activity (diverse commands, productive burst) +``` + +Low entropy + high frequency = retry burst (panic signal). +High entropy + high frequency = productive exploration (not a panic signal). + +**Contradiction persistence** triggers when: +- Same failure signature (stack trace / test name) repeats N ≥ 3 times +- AND touched module set overlap ≥ 80% between retries (no meaningful trajectory change) +- AND no new module introduced between retries +- NOT triggered by `fail → edit → fail` alone (normal TDD) + +**Repetitive shell retry burst** triggers when: +- High-frequency repeated identical commands OR repeated failing commands +- Low `commandEntropy` over recent window +- NOT triggered by raw command volume + +**Large patch attenuation:** If large patch (> 500 LOC) is accompanied by high `commandEntropy` +(diverse command sequence consistent with deliberate refactoring), weight is reduced from +30 +to +10. High entropy + large patch = likely legitimate boilerplate generation. Low entropy + +large patch = likely panicked patching. + +**Meaningful file trajectory change** is defined as: +- Touched module set overlap < 80% with previous attempt, OR +- At least one new module introduced, OR +- Edit distance of touched file set > 2 + +### Decay + +- **Passive:** `-5 / minute` based on wall-clock elapsed since `updatedAt` +- **orient() success:** recovery bonus (see orient spam protection below) +- **Locality recovery:** `-3 / call` when `density < 0.10 && oscillation < 0.10 && staleDepth = 0` + + Behavioral stabilization is inferred from sustained local navigation with low oscillation + and low trajectory density. The system does not observe intent — it observes the spatial + coherence of tool usage. Concentrated, low-oscillation navigation is treated as evidence + of anchored, productive work. + +Score clamped to `[0, 100]` after every operation. + +### Refractory Period + +After orient() achieves a score reduction (`panicDelta < 0`), upward signals are suppressed +for `PANIC_REFRACTORY_MS` (45 seconds). Locality recovery and passive decay still apply. + +``` +panicRecoverySuppressionUntil = now + 45s (set by orient() on any score-reducing call) +``` + +During the refractory window: +- `trajectory_burst`, `oscillation_spike`, `stale_depth_3` → skipped +- `passive_decay`, `locality_recovery` → still applied + +This prevents panic from immediately re-escalating after recovery. Without it, a single burst +trajectory immediately after orient() would undo the recovery bonus before the agent has had +a chance to re-anchor. The 45s window matches orient() → first few tool calls latency. + +`panicRecoverySuppressionUntil` is stored in the state file (as ISO string, omitted when +not active) so the hook can apply the same guard without re-querying the MCP server. + +### orient() Spam Protection + +`orient()` recovery is diminishing to prevent gaming the reset mechanism: + +| Condition | Recovery bonus | +|-----------|---------------| +| Normal usage | -40 | +| < 2 min since previous orient() | -15 | +| ≥ 3 rapid resets in current session | 0 | + +`recentOrientCount` and `lastOrientAt` tracked in panic state. + +--- + +## Panic Levels + +### Hysteresis Table + +Up and down transitions use different thresholds to prevent thrashing at boundary values: + +| Transition | Condition | +|-----------|-----------| +| L0 → L1 | score ≥ 30 | +| L1 → L0 | score < 20 | +| L1 → L2 | score ≥ 50 | +| L2 → L1 | score < 40 | +| L2 → L3 | score ≥ 70 | +| L3 → L2 | score < 60 | +| L3 → L4 | score ≥ 90 AND stale_depth ≥ 3 | +| L4 → L3 | score < 80 | + +### Panic Ceiling (stale depth floors) + +``` +While staleDepth ≥ 2: minimum panicLevel = 1 +While staleDepth = 3: minimum panicLevel = 2 +``` + +A critically stale agent cannot report Stable behavior. Floors are applied after hysteresis. + +### Summary Table + +| Level | Up threshold | Down threshold | Name | Channel | +|-------|-------------|----------------|------|---------| +| 0 | — | — | Stable | — | +| 1 | ≥ 30 | < 20 | Elevated | MCP + hook | +| 2 | ≥ 50 | < 40 | Panic | MCP + hook | +| 3 | ≥ 70 | < 60 | Scope Reduction | MCP + hook | +| 4 | ≥ 90 + stale3 | < 80 | Critical | hook advisory | + +### Hook Injection Cooldowns + +To prevent context saturation and habituation, hook interventions are rate-limited per level: + +| Level | Cooldown | +|-------|----------| +| L1 | 120s | +| L2 | 60s | +| L3 | 30s | +| L4 | 0s (always fires) | + +`lastHookInterventionAt` in panic state. Cooldown tracked per level. + +**Anti-wallpaper (stateful):** `interventionCountSinceStable` tracked in panic state. +When the same level fires ≥ 3 times since last Stable without score improvement, the +intervention mode escalates from advisory to directive: + +``` +// Advisory (first interventions) +[PANIC:PLANNING] Before cross-module modification, state: ... + +// Directive (≥3 repeated, no recovery) +[PANIC:PLANNING:DIRECTIVE] Previous checkpoint ignored. Stop. Run orient() now before proceeding. +``` + +Directive mode resets to advisory on any score reduction. This is V1 implementable — requires +only `interventionCountSinceStable: number` in the state file. + +### Intervention Messages + +**Level 1 — Reflective Checkpoint** +``` +[PANIC:ELEVATED] Recent navigation suggests increasing architectural uncertainty. +Consider: summarize current assumptions, identify uncertain dependencies, call orient(). +``` + +**Level 2 — Planning Enforcement** +``` +[PANIC:PLANNING] Before cross-module modification, state: +1. Intended architectural impact 2. Modules affected 3. Rollback strategy +Then proceed. +``` + +**Level 3 — Scope Reduction** +``` +[PANIC:SCOPE] Cross-module writes discouraged until orient(). +Prefer local changes. orient() expands operational scope. +``` + +**Level 4 — Circuit Breaker (advisory)** +``` +[PANIC:CRITICAL] Critical epistemic instability. Call orient() before further modifications. +``` + +--- + +## New Files + +- `src/core/services/mcp-handlers/panic-response.ts` — panic score computation, state + management, signal detection, atomic state writes. Reads from `EpistemicTracker` (reuses + existing `oscillation`, `density`, `staleDepth` fields). Exports `PanicState`, + `computePanicScore`, `writePanicState`, `applyHysteresis`. + +- `src/cli/commands/panic-check.ts` — `openlore panic-check` CLI command. Reads + `.openlore/panic-state.json` with fail-open semantics. Outputs structured response, + always exits 0. Supports `--format claude|kilo|codex`. Optionally queries Gryph. + +- `openspec/specs/panic-response/spec.md` — domain spec (generated after implementation). + +--- + +## Modified Files + +- `src/core/services/mcp-handlers/epistemic-lease.ts` — extend `EpistemicTracker` with + `panicScore: number`, `panicLevel: 0|1|2|3|4`, `localityConfidence: number`, + `recentOrientCount: number`. Panic computed alongside freshness on every `updateTracker()` + call. Reuses `oscillation`, `density`, `staleDepth` already computed. Explicit separation: + panic computation does not modify freshness fields and vice versa. + +- `src/core/services/mcp-handlers/utils.ts` — add `writePanicState(directory, state)` with + atomic temp+rename semantics. Called from `updateTracker()` after panic recomputation. + +- `src/cli/commands/mcp.ts` — ensure `writePanicState` fires on every tool dispatch. + +- `src/cli/index.ts` — register `panic-check` command. + +- `src/cli/commands/telemetry.ts` — add panic section: episodes, avg recovery latency, + hook intercepts, failed recovery rate. Telemetry reads `panic-response.jsonl`. + +--- + +## Hook Integration + +### Agent Capability Model + +```ts +interface AgentCapabilities { + supportsHooks: boolean; + supportsStructuredIntervention: boolean; + supportsBlockSemantics: boolean; +} +``` + +Capabilities declared per format. `panic-check --format ` uses the capability +profile for that agent to shape output. Unknown format = fall back to plain text warn. + +### openlore panic-check + +``` +openlore panic-check [--directory ] [--format claude|kilo|codex] +``` + +**Always exits 0.** Non-zero exit would be misinterpreted as tool crash / hook failure. +Intervention semantics are expressed exclusively through structured output. + +Structured output: + +```json +// L0 — stable +{"decision": "allow"} + +// L1-L3 — warning +{"decision": "warn", "severity": "elevated|panic|scope", "message": "..."} + +// L4 — advisory block +{"decision": "warn", "severity": "critical", "message": "[PANIC:CRITICAL] ..."} +``` + +**L4 uses `warn` + `severity: critical`, not `decision: block`.** Keeps semantics +consistent. Agent adapter MAY escalate `critical` to a block; it MAY NOT be forced to. +This is advisory architecture, not enforcement. + +**L4 enforcement model:** +``` +L4 is advisory by default. +Hook adapters MAY choose stronger semantics (pause/block) depending on runtime capabilities. +OpenLore itself never hard-blocks execution — not in V1, not in V2. +Execution interruption is a runtime policy decision, not a framework decision. +``` + +OpenLore emits signals. Runtimes decide what to do with them. This boundary is intentional: +OpenLore cannot verify that a block is safe or appropriate in context. Enforcement belongs +to the agent runtime that understands its execution model. + +Agent adapters translate `decision` + `severity` to agent-native semantics. + +**Hooks are best-effort runtime augmentations, not trusted enforcement boundaries.** +System correctness MUST NOT depend on hook execution. A hook that never fires must leave +the MCP flow fully functional. + +### Claude Code + +```json +{ + "hooks": { + "PreToolUse": [{ + "matcher": ".*", + "hooks": [{"type": "command", "command": "openlore panic-check --format claude"}] + }] + } +} +``` + +Installed automatically by `openlore setup --hooks claude`. + +### kilocode + +Plugin with `tool.execute.before`. Reads panic state directly from file to avoid CLI +spawn overhead. Interprets `severity: critical` as a throw (advisory block). +Distributed as built-in plugin or separate npm package. + +### Codex + +Identical hook format to Claude Code. Installed by `openlore setup --hooks codex`. + +### Performance + +Process spawn + Node startup + fs read + JSON parse per tool call adds 30–100ms depending +on machine. Acceptable in V1 for sequential tool calls; may cause noticeable stutter if +agent executes 10+ tools in rapid parallel bursts. + +**Critical V1 constraint:** The `panic-check` entry point in `src/cli/index.ts` MUST +short-circuit heavy dependency loading when the invoked command is `panic-check`. DB +drivers, analysis modules, and graph loaders MUST NOT be imported on this path. Only +`panic-state.json` read + JSON parse + output should execute. + +**Hook timeout:** Agent-side hook configuration MUST set a strict execution timeout +(recommended: 200ms). `panic-check` failing to respond within timeout MUST fail open — +tool execution proceeds as if no hook fired. A blocked `panic-check` process MUST NOT +freeze the agent runtime. + +**V2 optimization (not implemented):** `openlore-panicd` — persistent daemon, unix socket, +cached state, sub-millisecond reads. Implement only if V1 latency proves measurable in +practice. Likely to become a priority under daily use. + +--- + +## Gryph Integration (Optional) + +``` +Gryph integration MUST degrade gracefully to zero-impact absence semantics. +``` + +When `gryph` binary is absent or query fails: no signals added, no error, no log noise. + +**Configuration:** + +| Env var | Default | Purpose | +|---------|---------|---------| +| `OPENLORE_GRYPH_TIMEOUT_MS` | `150` | Per-query budget (ms). Both exec and write queries share this budget. Set higher on slow machines, lower if hook latency is a concern. Clamped to minimum 50ms. | + +Total Gryph latency budget ≤ `2 × OPENLORE_GRYPH_TIMEOUT_MS`. Add to the agent hook timeout calculation when Gryph is present. + +When present, `panic-check` queries: + +```bash +gryph query --format json --action exec --since +gryph query --format json --action write --since +``` + +Session scoped: matches Gryph session ID from `panic-state.json`. + +Signals consumed: repetitive shell retry bursts (via `commandEntropy`), contradiction +persistence (same failing test + no file trajectory change), large write events while stale. + +--- + +## Telemetry + +Domain: `panic-response.jsonl` + +**Rotation:** rotate at 50MB, keep last 5 files. Prevents unbounded growth from +high-frequency hook activity. + +**Sampling:** High-frequency hook telemetry MAY be sampled. Hook intercept events at L1 +with short cooldowns can be sampled at 10% without losing behavioral signal. + +| Event | Fields | +|-------|--------| +| `panic_elevated` | score, triggers[], agent | +| `reflective_checkpoint` | score, tool_name, channel, panicDelta, source | +| `planning_enforcement` | score, tool_name, channel, panicDelta, source | +| `scope_reduction` | score, tool_name, channel, panicDelta, source | +| `circuit_breaker` | score, stale_depth, channel, panicDelta, source | +| `panic_recovery` | score_before, score_after, via, latency_ms | +| `orient_spam_detected` | recentOrientCount, bonusApplied | + +**Panic provenance trace.** Every `panic_score_delta` event includes full per-trigger +attribution with measured evidence, enabling calibration and faux positif analysis: + +```json +{ + "event": "panic_score_delta", + "tool": "trace_execution_path", + "score_before": 42, + "score_after": 57, + "delta": 15, + "in_refractory": false, + "stale_depth": 3, + "density": 0.67, + "oscillation": 0.54, + "triggers": [ + { "name": "trajectory_burst", "delta": 15, "evidence": { "density": 0.67 } }, + { "name": "passive_decay", "delta": -5, "evidence": { "elapsed_min": 1.0 } } + ] +} +``` + +Separating "trigger" (the signal that fired) from "evidence" (the measured value that +activated it) is required for calibration. Without evidence, the log answers "what fired" +but not "why" — which makes threshold tuning impossible. + +`in_refractory: true` on events where upward signals were suppressed is critical for +detecting over-refractory situations (panic rising despite suppression is evidence that +the threshold is wrong or the window is too short). + +`channel` field: `mcp` or `hook`. + +`openlore telemetry` additions: + +| Metric | Meaning | +|--------|---------| +| panic_episodes | distinct destabilization events (score crossed L1 up-threshold) | +| avg_recovery_latency_ms | time from first L1 to score below L1 down-threshold | +| failed_recovery_rate | episodes where score re-escalated after reaching Stable | +| hook_intercepts | interventions fired via hook (agent not calling openlore) | +| orient_spam_events | orient() calls that received reduced recovery bonus | + +--- + +## Known Limitations + +**Oscillation fragility.** `oscillation` alone is not sufficient. Back-and-forth between +two modules is normal in several productive patterns: + +``` +backend ↔ frontend +interface ↔ implementation +test ↔ fix (TDD) +caller ↔ callee +``` + +The real signal is `oscillation + no convergence`. V1 lacks a convergence signal. This will +produce faux positifs on legitimate paired workflows. Mitigation: oscillation threshold set +conservatively (0.50), require +density burst for L3+ transitions. V2 should add +convergence tracking (see below). + +**Productive chaos.** A large-scale refactor is behaviorally indistinguishable from a panic +episode: + +- many modules touched +- large writes +- broken builds +- repeated commands +- oscillation between test/impl +- trajectory density spikes + +`commandEntropy` mitigates this partially. High entropy + large patch = attenuated signal. +But monorepo traversal, rename cascades, and API sync are cases where `commandEntropy` stays +high AND trajectory density stays high — false panic guaranteed. V2 needs a "productive +refactor mode" signal (see below). + +**Goal coherence absent.** Current model measures movement, oscillation, and repetition +but not progression. A→B→C→D→E looks identical whether the agent is systematically +working through a refactor or drifting with no coherent goal. Without some notion of +`currentTaskScope` or objective tracking, the model cannot distinguish these. + +**Hook dependency.** The PreToolUse hook must NEVER become mandatory. If the hook is absent, +disabled, or times out, the MCP flow must proceed normally. System correctness must never +depend on hook execution. Runtimes may install the hook for observability; they must not +treat its absence as a failure condition. + +## Non-Goals (initial version) + +- Hard blocking at L4 (advisory only, forever) +- Goal coherence / task scope tracking (V2 — requires agent protocol changes) +- Convergence signals (V2 — needs "new module frontier" and "same error recurrence" tracking) +- Productive refactor mode detection (V2 — expanding module frontier + low contradiction persistence) +- Agents beyond Claude Code, kilocode, Codex +- Gryph as a required dependency +- Psychological modeling, intent classification, prompt inspection +- `openlore-panicd` daemon (V2) +- Adaptive hook reinjection with semantic variation (V2, contract established above) +- Persistent panic state across sessions (each session starts fresh) diff --git a/src/cli/commands/gryph-watch.ts b/src/cli/commands/gryph-watch.ts new file mode 100644 index 00000000..a8ea65b3 --- /dev/null +++ b/src/cli/commands/gryph-watch.ts @@ -0,0 +1,83 @@ +/** + * openlore gryph-watch + * + * Standalone Gryph behavioral observer. Runs as an independent background + * process — lifetime decoupled from the MCP server session. Polls Gryph every + * interval and writes behavioral signals to panic-state.json via CAS writes. + * + * Why a separate process: MCP-path Gryph polling only starts after the first + * openlore tool call. Agents working exclusively via Bash/Edit/Read never + * trigger that path. gryph-watch closes this gap by running continuously from + * session start. + * + * Signals provided (standalone, without MCP tracker context): + * repetitiveRetryBurst — low entropy + failing commands (no stale context needed) + * + * Signals requiring MCP tracker (not available here): + * largePatchWhileStale — staleDepth unknown without EpistemicLease session + * + * Install via: openlore setup --hooks claude + * Which installs a UserPromptSubmit hook: openlore gryph-watch & + */ + +import { Command } from 'commander'; +import { existsSync, readFileSync, writeFileSync, unlinkSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { OPENLORE_DIR } from '../../constants.js'; +import { readOpenLoreConfig } from '../../core/services/config-manager.js'; +import { startGryphPolling } from '../../core/services/mcp-handlers/gryph-bridge.js'; + +const PID_FILE = 'gryph-watch.pid'; + +function findProjectDirectory(startDir: string): string | null { + let dir = startDir; + for (;;) { + if (existsSync(join(dir, OPENLORE_DIR, 'config.json'))) return dir; + const parent = dirname(dir); + if (parent === dir) return null; + dir = parent; + } +} + +function isProcessAlive(pid: number): boolean { + try { process.kill(pid, 0); return true; } + catch { return false; } +} + +export const gryphWatchCommand = new Command('gryph-watch') + .description('Background Gryph behavioral observer (install via: openlore setup --hooks)') + .argument('[directory]', 'Project directory — auto-detected from cwd if omitted') + .action(async (directoryArg?: string) => { + const directory = directoryArg + ?? findProjectDirectory(process.cwd()) + ?? process.cwd(); + + const cfg = await readOpenLoreConfig(directory); + const mode = cfg?.panicResponse?.mode ?? 'off'; + if (mode === 'off') process.exit(0); + + // Singleton enforcement: one watcher per directory + const pidPath = join(directory, OPENLORE_DIR, PID_FILE); + if (existsSync(pidPath)) { + try { + const existing = parseInt(readFileSync(pidPath, 'utf-8').trim(), 10); + if (!isNaN(existing) && isProcessAlive(existing)) process.exit(0); + } catch { /* stale PID file — proceed */ } + } + try { writeFileSync(pidPath, String(process.pid), 'utf-8'); } catch { /* non-fatal */ } + + const cleanup = (): void => { + try { unlinkSync(pidPath); } catch { /* ignore */ } + process.exit(0); + }; + process.on('SIGTERM', cleanup); + process.on('SIGINT', cleanup); + // Detect parent process death via stdin EOF (pipe from shell/agent closes) + process.stdin.resume(); + process.stdin.on('close', cleanup); + + // startGryphPolling drives a while loop internally — pending setTimeout keeps + // the process alive. getTracker: () => null is intentional: staleDepth is + // unknown without an active MCP session; largePatchWhileStale is MCP-path-only. + startGryphPolling({ directory, getTracker: () => null }); + }); diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index ab23c47c..901c43cc 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -31,9 +31,12 @@ import { } from '@modelcontextprotocol/sdk/types.js'; import { sanitizeMcpError, validateDirectory } from '../../core/services/mcp-handlers/utils.js'; -import { createTracker, updateTracker, getFreshnessSignal } from '../../core/services/mcp-handlers/epistemic-lease.js'; +import { createTracker, updateTracker, updatePanic, getFreshnessSignal, trackerToPanicState } from '../../core/services/mcp-handlers/epistemic-lease.js'; import type { EpistemicTracker } from '../../core/services/mcp-handlers/epistemic-lease.js'; +import type { PanicResponseMode } from '../../types/index.js'; +import { readPanicState, writePanicState, getPanicSignalText } from '../../core/services/mcp-handlers/panic-response.js'; import { emit } from '../../core/services/telemetry.js'; +import { readOpenLoreConfig } from '../../core/services/config-manager.js'; import { DEFAULT_DRIFT_MAX_FILES } from '../../constants.js'; import { handleGetCallGraph, @@ -1306,6 +1309,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // Per-session epistemic lease tracker — re-initialized when directory changes. let tracker: EpistemicTracker | undefined; let trackerDir = ''; + let panicPolicy: PanicResponseMode = 'off'; // --watch-auto: start the watcher on the first tool call that carries a directory let autoWatcher: import('../../core/services/mcp-watcher.js').McpWatcher | undefined; @@ -1354,9 +1358,51 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { if (directory && (!tracker || directory !== trackerDir)) { tracker = createTracker(directory); trackerDir = directory; + const cfg = await readOpenLoreConfig(directory); + panicPolicy = cfg?.panicResponse?.mode ?? 'off'; + } + // Update epistemic state before dispatch (orient resets tracker internally). + // Invariant: only MCP tool calls (this path) feed panic. CLI commands (panic-check, + // telemetry) are separate processes that read state but never call updateTracker — + // no recursive panic feedback loop from openlore internal commands. + if (tracker && directory) { + const prevOrientResetAt = tracker.lastOrientResetAt; + updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); + const orientJustFired = tracker.lastOrientResetAt !== prevOrientResetAt; + + if (panicPolicy !== 'off') { + // Read disk state to preserve hook-written fields (lastHookInterventionAt, gryphWindowStart) + // that panic-check (separate process) may have set since the last MCP write. + const diskState = readPanicState(directory); + updatePanic(tracker, { + density: tracker.density, + oscillation: tracker.oscillation, + weight: 1, + staleDepth: tracker.staleDepth, + directory, + tool: name, + }); + const stateToWrite = { + ...trackerToPanicState(tracker, agentName), + lastHookInterventionAt: diskState.lastHookInterventionAt, + gryphWindowStart: diskState.gryphWindowStart, + }; + tracker.panicRevision = writePanicState(directory, stateToWrite); + + // Feedback loop: did orient() respond to a prior hook intervention? + if (orientJustFired && diskState.lastHookInterventionAt) { + const lagMs = Date.now() - new Date(diskState.lastHookInterventionAt).getTime(); + if (lagMs < 5 * 60 * 1000) { + emit(directory, 'panic', { + event: 'panic_intervention_outcome', + outcome: 'responded', + intervention_lag_ms: lagMs, + orient_kind: tracker.recentOrientCount >= 3 ? 'spam' : tracker.recentOrientCount >= 2 ? 'rapid' : 'normal', + }); + } + } + } } - // Update epistemic state before dispatch (orient resets tracker internally) - if (tracker && directory) updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); let result: unknown; @@ -1540,19 +1586,42 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { }; } - emit(directory, 'mcp', { event: 'tool_call', tool: name, ms: Date.now() - _t0, agent: agentName, agent_version: agentVersion }); + emit(directory, 'mcp', { + event: 'tool_call', tool: name, ms: Date.now() - _t0, agent: agentName, agent_version: agentVersion, + panic_level: tracker?.panicLevel ?? 0, + panic_score: tracker?.panicScore ?? 0, + }); const text = typeof result === 'string' ? result : JSON.stringify(result, null, 2); const signal = tracker ? getFreshnessSignal(tracker) : null; - // Freshness signal is a separate content item — never concatenated into - // the result body — so structured outputs (JSON, patches) are not corrupted. - const content: Array<{ type: 'text'; text: string }> = signal - ? signal.prepend - ? [{ type: 'text', text: signal.text }, { type: 'text', text }] - : [{ type: 'text', text }, { type: 'text', text: signal.text }] - : [{ type: 'text', text }]; + // Both freshness and panic signals are separate content items — never + // concatenated into the result body — so structured outputs (JSON, patches) + // are not corrupted. Panic signal always appended (after result). + const content: Array<{ type: 'text'; text: string }> = []; + if (signal?.prepend) content.push({ type: 'text', text: signal.text }); + content.push({ type: 'text', text }); + if (signal && !signal.prepend) content.push({ type: 'text', text: signal.text }); + + if (tracker && (panicPolicy === 'advisory' || panicPolicy === 'experimental_blocking')) { + const panicState = trackerToPanicState(tracker, agentName); + const panicText = getPanicSignalText(panicState); + if (panicText) { + content.push({ type: 'text', text: panicText }); + tracker.interventionCountSinceStable++; + tracker.panicRevision = writePanicState(directory, trackerToPanicState(tracker, agentName)); + emit(directory, 'panic', { + event: 'panic_signal_injected', + panic_level: tracker.panicLevel, + panic_score: tracker.panicScore, + intervention_count: tracker.interventionCountSinceStable, + directive_mode: tracker.interventionCountSinceStable >= 3, + tool: name, + agent: agentName, + }); + } + } return { content }; } catch (err) { diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts new file mode 100644 index 00000000..d0dad572 --- /dev/null +++ b/src/cli/commands/panic-check.ts @@ -0,0 +1,108 @@ +/** + * openlore panic-check + * + * Reads panic-state.json and outputs a structured JSON decision for the + * Claude Code PreToolUse hook. Always exits 0 — severity is encoded in + * the payload, not the exit code, so the hook runtime never sees an error. + * + * Designed for minimal startup overhead: imports only node built-ins and + * constants. Heavy MCP dependencies are never loaded. + */ + +import { Command } from 'commander'; +import { readPanicState, writePanicState, buildPanicCheckOutput } from '../../core/services/mcp-handlers/panic-response.js'; +import { queryGryphSignals, applyGryphDelta } from '../../core/services/mcp-handlers/gryph-bridge.js'; +import { readOpenLoreConfig } from '../../core/services/config-manager.js'; +import { emit } from '../../core/services/telemetry.js'; + +type HookFormat = 'claude' | 'kilo' | 'codex'; + +export const panicCheckCommand = new Command('panic-check') + .description('Check current panic level (PreToolUse hook consumer)') + .option('-d, --directory ', 'Project directory', process.cwd()) + .option('-f, --format ', 'Hook format: claude|kilo|codex', 'claude') + .action(async (options: { directory: string; format: string }) => { + try { + const dir = options.directory; + const format = options.format as HookFormat; + + // Policy gate — config is single source of truth + const cfg = await readOpenLoreConfig(dir); + const mode = cfg?.panicResponse?.mode ?? 'off'; + + if (mode === 'off' || mode === 'observe') { + // Panic disabled or observe-only: hook passes through silently + process.exit(0); + } + + let state = readPanicState(dir); + + // Gryph enrichment — query from gryphWindowStart (2-min fallback avoids replaying hours of history) + const since = state.gryphWindowStart ?? new Date(Date.now() - 2 * 60 * 1000).toISOString(); + const gryphSignals = queryGryphSignals(since); + if (gryphSignals) { + const enrichedTriggers = [...state.triggers]; + const enrichedScore = applyGryphDelta( + state.panicScore, + gryphSignals, + state.panicLevel >= 2, // isStale when at L2+ + enrichedTriggers, + ); + if (enrichedScore !== state.panicScore) { + state = { + ...state, + panicScore: enrichedScore, + triggers: enrichedTriggers, + }; + } + } + + const output = buildPanicCheckOutput(state); + + if (output.decision === 'warn') { + const newCount = state.interventionCountSinceStable + 1; + const now = new Date().toISOString(); + writePanicState(dir, { + ...state, + lastHookInterventionAt: now, + gryphWindowStart: now, + interventionCountSinceStable: newCount, + }); + emit(dir, 'panic', { + event: 'hook_intervention', + channel: 'pre_tool_use', + format, + panic_level: state.panicLevel, + severity: output.severity, + directive_mode: newCount >= 3, + intervention_count: newCount, + gryph_enriched: gryphSignals !== null, + }); + } + + // experimental_blocking: emit block signal at L4 — runtime decides enforcement. + // advisory:true is explicit in the payload: OpenLore recommends, never mandates. + // OpenLore always exits 0. + if (mode === 'experimental_blocking' && state.panicLevel >= 4) { + const blockOutput = { decision: 'block' as const, advisory: true, panicLevel: state.panicLevel, message: output.message }; + process.stdout.write(JSON.stringify(blockOutput) + '\n'); + process.exit(0); + } + + process.stdout.write(formatOutput(output, format) + '\n'); + } catch { + // fail-open: any error → silent exit 0 + } + process.exit(0); + }); + +function formatOutput(output: ReturnType, format: HookFormat): string { + // claude and codex both consume raw JSON — codex uses the same Claude Code hook schema + if (format === 'claude' || format === 'codex') { + return JSON.stringify(output); + } + + // kilo: plain-text message (some runtimes just want a string signal) + if (output.decision === 'allow') return ''; + return output.message ?? `[PANIC:${output.severity?.toUpperCase() ?? 'WARN'}] Destabilization detected — call orient().`; +} diff --git a/src/cli/commands/panic-level.ts b/src/cli/commands/panic-level.ts new file mode 100644 index 00000000..31ac8ae4 --- /dev/null +++ b/src/cli/commands/panic-level.ts @@ -0,0 +1,27 @@ +/** + * openlore panic-level + * + * Read-only status line output: current panic level as a compact string. + * No side effects, no writes — safe to call from a status line poller. + * + * Output: "P:L{n}" at L1–L4, empty string at L0. + * Exit: always 0. + */ + +import { Command } from 'commander'; +import { readPanicState } from '../../core/services/mcp-handlers/panic-response.js'; + +export const panicLevelCommand = new Command('panic-level') + .description('Output current panic level for status line display (read-only, exits 0)') + .option('-d, --directory ', 'Project directory', process.cwd()) + .action((options: { directory: string }) => { + try { + const state = readPanicState(options.directory); + if (state.panicLevel > 0) { + process.stdout.write(`P:L${state.panicLevel}`); + } + } catch { + // fail-open: output nothing + } + process.exit(0); + }); diff --git a/src/cli/commands/setup.ts b/src/cli/commands/setup.ts index 46bb6cd3..bca20796 100644 --- a/src/cli/commands/setup.ts +++ b/src/cli/commands/setup.ts @@ -23,6 +23,87 @@ import { fileURLToPath } from 'node:url'; import { checkbox } from '@inquirer/prompts'; import { logger } from '../../utils/logger.js'; import { installPreCommitHook, installClaudeHook } from './decisions.js'; +import { readOpenLoreConfig, writeOpenLoreConfig } from '../../core/services/config-manager.js'; +import type { PanicResponseMode } from '../../types/index.js'; + +// ============================================================================ +// PANIC CHECK HOOK +// Installs openlore panic-check as a PreToolUse hook in .claude/settings.json. +// ============================================================================ + +const PANIC_CHECK_HOOK_MARKER = 'openlore panic-check'; + +interface ClaudeHookSettings { + hooks?: { + PreToolUse?: Array<{ _comment?: string; [key: string]: unknown }>; + PostToolUse?: Array<{ _comment?: string; [key: string]: unknown }>; + UserPromptSubmit?: Array<{ _comment?: string; [key: string]: unknown }>; + [key: string]: unknown; + }; + [key: string]: unknown; +} + +export async function installPanicCheckHook(rootPath: string, format: string = 'claude'): Promise { + const settingsPath = join(rootPath, '.claude', 'settings.json'); + let settings: ClaudeHookSettings = {}; + + try { + settings = JSON.parse(await readFile(settingsPath, 'utf-8')) as ClaudeHookSettings; + } catch { /* file missing or corrupt — start fresh */ } + + const hooks = settings.hooks?.PreToolUse ?? []; + if (hooks.some((h) => JSON.stringify(h).includes(PANIC_CHECK_HOOK_MARKER))) { + logger.success('panic-check PreToolUse hook already present in .claude/settings.json'); + return; + } + + const hookEntry = { + _comment: 'openlore: behavioral destabilization guard — fires before every tool call', + type: 'command', + command: `openlore panic-check --directory "$(pwd)" --format ${format}`, + }; + + settings.hooks ??= {}; + settings.hooks.PreToolUse = [...hooks, hookEntry]; + + await mkdir(join(rootPath, '.claude'), { recursive: true }); + await writeFile(settingsPath, JSON.stringify(settings, null, 2) + '\n', 'utf-8'); + logger.success(`panic-check PreToolUse hook added to .claude/settings.json (format: ${format})`); +} + +// ============================================================================ +// GRYPH WATCH HOOK +// Installs openlore gryph-watch as a UserPromptSubmit hook — starts the +// background Gryph observer once per session, decoupled from MCP tool calls. +// ============================================================================ + +const GRYPH_WATCH_HOOK_MARKER = 'openlore gryph-watch'; + +export async function installGryphWatchHook(rootPath: string): Promise { + const settingsPath = join(rootPath, '.claude', 'settings.json'); + let settings: ClaudeHookSettings = {}; + try { + settings = JSON.parse(await readFile(settingsPath, 'utf-8')) as ClaudeHookSettings; + } catch { /* start fresh */ } + + const hooks = settings.hooks?.UserPromptSubmit ?? []; + if (hooks.some((h) => JSON.stringify(h).includes(GRYPH_WATCH_HOOK_MARKER))) { + logger.success('gryph-watch UserPromptSubmit hook already present in .claude/settings.json'); + return; + } + + const hookEntry = { + _comment: 'openlore: start Gryph behavioral observer (singleton, background)', + type: 'command', + command: 'openlore gryph-watch &', + }; + settings.hooks ??= {}; + settings.hooks.UserPromptSubmit = [...hooks, hookEntry]; + + await mkdir(join(rootPath, '.claude'), { recursive: true }); + await writeFile(settingsPath, JSON.stringify(settings, null, 2) + '\n', 'utf-8'); + logger.success('gryph-watch UserPromptSubmit hook added to .claude/settings.json'); +} // ============================================================================ // TYPES @@ -280,10 +361,47 @@ export const setupCommand = new Command('setup') false ) .option('--dir ', 'Project root directory', process.cwd()) - .action(async (options: { tools?: string; force: boolean; dir: string }) => { + .option( + '--hooks ', + 'Install PreToolUse panic-check hook for the given agent format: claude|kilo|codex' + ) + .option( + '--panic ', + 'Set panic response mode in .openlore/config.json: off|observe|advisory|experimental_blocking' + ) + .action(async (options: { tools?: string; force: boolean; dir: string; hooks?: string; panic?: string }) => { const projectRoot = options.dir; const allTools: ToolName[] = ['vibe', 'cline', 'gsd', 'bmad', 'claude', 'opencode', 'omoa']; + // If only flag options (no tool install needed), run them and exit early + if (!options.tools && (options.hooks || options.panic) && !process.stdout.isTTY) { + if (options.hooks) { + const validFormats = ['claude', 'kilo', 'codex']; + const fmt = validFormats.includes(options.hooks) ? options.hooks : 'claude'; + if (!validFormats.includes(options.hooks)) { + logger.warning(`Unknown hooks format "${options.hooks}" — defaulting to "claude"`); + } + await installPanicCheckHook(projectRoot, fmt); + await installGryphWatchHook(projectRoot); + } + if (options.panic !== undefined) { + const validModes: PanicResponseMode[] = ['off', 'observe', 'advisory', 'experimental_blocking']; + if (!validModes.includes(options.panic as PanicResponseMode)) { + logger.error(`Unknown panic mode "${options.panic}". Valid: ${validModes.join(', ')}`); + } else { + const cfg = await readOpenLoreConfig(projectRoot); + if (!cfg) { + logger.warning('No .openlore/config.json found — run openlore init first.'); + } else { + cfg.panicResponse = { mode: options.panic as PanicResponseMode }; + await writeOpenLoreConfig(projectRoot, cfg); + logger.success(`panic response mode set to "${options.panic}"`); + } + } + } + process.exit(0); + } + let tools: ToolName[]; if (options.tools) { tools = (options.tools.split(',').map((t) => t.trim()) as ToolName[]).filter((t) => @@ -365,6 +483,35 @@ export const setupCommand = new Command('setup') await installClaudeHook(projectRoot); } + // --hooks flag: install panic-check PreToolUse hook independently of --tools + if (options.hooks) { + const validFormats = ['claude', 'kilo', 'codex']; + const fmt = validFormats.includes(options.hooks) ? options.hooks : 'claude'; + if (!validFormats.includes(options.hooks)) { + logger.warning(`Unknown hooks format "${options.hooks}" — defaulting to "claude"`); + } + await installPanicCheckHook(projectRoot, fmt); + await installGryphWatchHook(projectRoot); + } + + // --panic flag: update panicResponse.mode in .openlore/config.json + if (options.panic !== undefined) { + const validModes: PanicResponseMode[] = ['off', 'observe', 'advisory', 'experimental_blocking']; + if (!validModes.includes(options.panic as PanicResponseMode)) { + logger.error(`Unknown panic mode "${options.panic}". Valid: ${validModes.join(', ')}`); + } else { + const mode = options.panic as PanicResponseMode; + const cfg = await readOpenLoreConfig(projectRoot); + if (!cfg) { + logger.warning('No .openlore/config.json found — run openlore init first.'); + } else { + cfg.panicResponse = { mode }; + await writeOpenLoreConfig(projectRoot, cfg); + logger.success(`panic response mode set to "${mode}"`); + } + } + } + // ── Report ─────────────────────────────────────────────────────────────── const byTool: Record = {}; for (const r of results) { diff --git a/src/cli/commands/telemetry.test.ts b/src/cli/commands/telemetry.test.ts new file mode 100644 index 00000000..74aa7535 --- /dev/null +++ b/src/cli/commands/telemetry.test.ts @@ -0,0 +1,325 @@ +/** + * Validates panic/lease telemetry metric aggregation with synthetic JSONL events. + * Tests computePanicStats, computeRecovery, computeObstinacy directly. + */ + +import { describe, it, expect } from 'vitest'; +import { computePanicStats, computeRecovery, computeObstinacy } from './telemetry.js'; +import type { PanicEvent, LeaseEvent, McpEvent } from './telemetry.js'; + +// ── helpers ────────────────────────────────────────────────────────────────── + +function ts(offsetMs: number = 0): string { + return new Date(1_700_000_000_000 + offsetMs).toISOString(); +} + +function levelChange(from: number, to: number, offsetMs: number, extra?: Partial): PanicEvent { + return { ts: ts(offsetMs), event: 'panic_level_change', from_level: from, to_level: to, ...extra }; +} + +function orientReset(kind: 'normal' | 'rapid' | 'spam', offsetMs: number): PanicEvent { + return { ts: ts(offsetMs), event: 'panic_orient_reset', orient_kind: kind }; +} + +function hookIntervention(offsetMs: number, gryph = false, count = 1): PanicEvent { + return { ts: ts(offsetMs), event: 'hook_intervention', intervention_count: count, gryph_enriched: gryph }; +} + +function injection(offsetMs: number): PanicEvent { + return { ts: ts(offsetMs), event: 'panic_signal_injected' }; +} + +// ── computePanicStats ──────────────────────────────────────────────────────── + +describe('computePanicStats', () => { + it('returns zeros on empty input', () => { + const r = computePanicStats([]); + expect(r.panic_episodes).toBe(0); + expect(r.avg_recovery_ms).toBeNull(); + expect(r.failed_recovery_rate).toBe('—'); + expect(r.hook_intercepts).toBe(0); + expect(r.mcp_injections).toBe(0); + expect(r.orient_spam_events).toBe(0); + expect(r.orient_rapid_events).toBe(0); + expect(r.gryph_enriched_intercepts).toBe(0); + expect(r.trigger_counts).toHaveLength(0); + }); + + it('counts a completed episode (0→L2→0)', () => { + const events: PanicEvent[] = [ + levelChange(0, 2, 0), + levelChange(2, 0, 60_000), + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.avg_recovery_ms).toBe(60_000); + expect(r.failed_recovery_rate).toBe('0/1'); + }); + + it('measures avg recovery latency over multiple completed episodes', () => { + const events: PanicEvent[] = [ + levelChange(0, 1, 0), + levelChange(1, 0, 30_000), // 30s episode + levelChange(0, 3, 100_000), + levelChange(3, 0, 190_000), // 90s episode + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(2); + expect(r.avg_recovery_ms).toBe(60_000); // (30000 + 90000) / 2 + }); + + it('tracks peak level within an episode', () => { + // level escalates within episode + const events: PanicEvent[] = [ + levelChange(0, 1, 0), + levelChange(1, 3, 10_000), // escalation mid-episode + levelChange(3, 0, 60_000), + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.failed_recovery_rate).toBe('0/1'); + }); + + it('counts failed recovery: episode with no return to L0', () => { + const events: PanicEvent[] = [ + levelChange(0, 2, 0), + // no return to 0 + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.failed_recovery_rate).toBe('1/1'); + expect(r.avg_recovery_ms).toBeNull(); // no completed episodes + }); + + it('mixed: 1 completed + 1 failed → correct rate and avg', () => { + const events: PanicEvent[] = [ + levelChange(0, 2, 0), + levelChange(2, 0, 45_000), // completed: 45s + levelChange(0, 3, 200_000), // new episode, never closes + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(2); + expect(r.failed_recovery_rate).toBe('1/2'); + expect(r.avg_recovery_ms).toBe(45_000); // only completed episode + }); + + it('counts hook intercepts and mcp injections', () => { + const events: PanicEvent[] = [ + hookIntervention(0), + hookIntervention(5_000), + injection(10_000), + injection(15_000), + injection(20_000), + ]; + const r = computePanicStats(events); + expect(r.hook_intercepts).toBe(2); + expect(r.mcp_injections).toBe(3); + }); + + it('counts orient spam and rapid events', () => { + const events: PanicEvent[] = [ + orientReset('normal', 0), + orientReset('rapid', 30_000), + orientReset('rapid', 60_000), + orientReset('spam', 90_000), + orientReset('spam', 120_000), + ]; + const r = computePanicStats(events); + expect(r.orient_spam_events).toBe(2); + expect(r.orient_rapid_events).toBe(2); + }); + + it('counts gryph-enriched hook intercepts', () => { + const events: PanicEvent[] = [ + hookIntervention(0, false), + hookIntervention(5_000, true), + hookIntervention(10_000, true), + ]; + const r = computePanicStats(events); + expect(r.hook_intercepts).toBe(3); + expect(r.gryph_enriched_intercepts).toBe(2); + }); + + it('aggregates trigger frequency from call_triggers', () => { + const events: PanicEvent[] = [ + { ts: ts(0), event: 'hook_intervention', call_triggers: ['trajectory_burst', 'oscillation_spike'] }, + { ts: ts(5_000), event: 'hook_intervention', call_triggers: ['trajectory_burst'] }, + { ts: ts(10_000), event: 'hook_intervention', call_triggers: ['stale_depth_3'] }, + ]; + const r = computePanicStats(events); + const tmap = new Map(r.trigger_counts); + expect(tmap.get('trajectory_burst')).toBe(2); + expect(tmap.get('oscillation_spike')).toBe(1); + expect(tmap.get('stale_depth_3')).toBe(1); + // sorted descending by count + expect(r.trigger_counts[0][0]).toBe('trajectory_burst'); + }); + + it('handles level changes that arrive out of chronological order', () => { + // sort should handle this + const events: PanicEvent[] = [ + levelChange(2, 0, 60_000), // end of episode (arrives first in array) + levelChange(0, 2, 0), // start + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.avg_recovery_ms).toBe(60_000); + expect(r.failed_recovery_rate).toBe('0/1'); + }); + + it('ignores non-level-change events for episode tracking', () => { + const events: PanicEvent[] = [ + hookIntervention(0), + orientReset('normal', 10_000), + injection(20_000), + // no level changes → no episodes + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(0); + expect(r.avg_recovery_ms).toBeNull(); + }); +}); + +// ── computeRecovery ─────────────────────────────────────────────────────────── + +describe('computeRecovery', () => { + function staleEvent(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'stale', depth: 1 }; + } + function orientReset(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'orient_reset', prior_load: 0, prior_depth: 1 }; + } + function orientCall(offsetMs: number): McpEvent { + return { ts: ts(offsetMs), event: 'tool_call', tool: 'orient', ms: 50 }; + } + function degraded(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'degraded' }; + } + + it('computes avg stale→orient latency', () => { + const lease: LeaseEvent[] = [staleEvent(0)]; + const mcp: McpEvent[] = [orientCall(45_000)]; + const r = computeRecovery(mcp, lease); + expect(r.avg_recovery_ms).toBe(45_000); + expect(r.stale_events).toBe(1); + expect(r.orient_calls).toBe(1); + }); + + it('averages latency over multiple stale→orient pairs', () => { + const lease: LeaseEvent[] = [staleEvent(0), staleEvent(100_000)]; + const mcp: McpEvent[] = [orientCall(60_000), orientCall(130_000)]; + const r = computeRecovery(mcp, lease); + expect(r.avg_recovery_ms).toBe(45_000); // (60000 + 30000) / 2 + }); + + it('returns null avg when no stale event has a subsequent orient', () => { + const lease: LeaseEvent[] = [staleEvent(100_000)]; + const mcp: McpEvent[] = [orientCall(0)]; // orient before stale + const r = computeRecovery(mcp, lease); + expect(r.avg_recovery_ms).toBeNull(); + }); + + it('computes recovery half-life (orient_reset → next degradation)', () => { + const lease: LeaseEvent[] = [orientReset(0), degraded(90_000)]; + const mcp: McpEvent[] = []; + const r = computeRecovery(mcp, lease); + expect(r.avg_stable_after_orient_ms).toBe(90_000); + }); + + it('returns null half-life when no degradation follows reset', () => { + const lease: LeaseEvent[] = [orientReset(0)]; + const mcp: McpEvent[] = []; + const r = computeRecovery(mcp, lease); + expect(r.avg_stable_after_orient_ms).toBeNull(); + }); + + it('computes correct recurrence rate', () => { + const lease: LeaseEvent[] = [staleEvent(0), staleEvent(200_000)]; + const mcp: McpEvent[] = [orientCall(100_000)]; + const r = computeRecovery(mcp, lease); + expect(r.stale_events).toBe(2); + expect(r.orient_calls).toBe(1); + expect(r.recurrence_rate).toBe('2.00 stale/orient'); + }); + + it('returns — for recurrence rate when no orients', () => { + const lease: LeaseEvent[] = [staleEvent(0)]; + const mcp: McpEvent[] = []; + const r = computeRecovery(mcp, lease); + expect(r.recurrence_rate).toBe('—'); + }); +}); + +// ── computeObstinacy ───────────────────────────────────────────────────────── + +describe('computeObstinacy', () => { + function staleEvent(depth: number, offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'stale', depth }; + } + function orientResetEvent(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'orient_reset' }; + } + function toolCall(name: string, offsetMs: number): McpEvent { + return { ts: ts(offsetMs), event: 'tool_call', tool: name, ms: 10 }; + } + + it('returns zeros on empty input', () => { + const r = computeObstinacy([], []); + expect(r.total_stale_episodes).toBe(0); + expect(r.avg_calls_before_orient).toBe('—'); + }); + + it('counts tool calls between stale and orient_reset', () => { + const lease: LeaseEvent[] = [staleEvent(1, 0), orientResetEvent(50_000)]; + const mcp: McpEvent[] = [ + toolCall('search_code', 10_000), + toolCall('get_subgraph', 20_000), + toolCall('orient', 50_000), // orient itself, counts as orient kind + ]; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(1); + // 2 non-orient tool calls before orient_reset + expect(r.episodes[0].calls_before_orient).toBe(2); + }); + + it('tracks max depth within episode', () => { + const lease: LeaseEvent[] = [ + staleEvent(1, 0), + staleEvent(2, 10_000), // depth escalation mid-episode + orientResetEvent(60_000), + ]; + const mcp: McpEvent[] = []; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(1); + expect(r.episodes[0].depth).toBe(2); + }); + + it('counts open episode (no orient at end) as last segment', () => { + const lease: LeaseEvent[] = [staleEvent(1, 0)]; + const mcp: McpEvent[] = [ + toolCall('search_code', 10_000), + toolCall('get_subgraph', 20_000), + ]; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(1); + expect(r.episodes[0].calls_before_orient).toBe(2); + }); + + it('handles multiple separate stale episodes', () => { + const lease: LeaseEvent[] = [ + staleEvent(1, 0), + orientResetEvent(30_000), + staleEvent(2, 60_000), + orientResetEvent(90_000), + ]; + const mcp: McpEvent[] = [ + toolCall('search_code', 10_000), + toolCall('search_code', 70_000), + toolCall('search_code', 80_000), + ]; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(2); + expect(r.episodes[0].calls_before_orient).toBe(1); + expect(r.episodes[1].calls_before_orient).toBe(2); + }); +}); diff --git a/src/cli/commands/telemetry.ts b/src/cli/commands/telemetry.ts index 0054883b..2cd57b10 100644 --- a/src/cli/commands/telemetry.ts +++ b/src/cli/commands/telemetry.ts @@ -50,6 +50,17 @@ interface LeaseEvent { from_state?: string; tool?: string; cognitive_load?: number; density?: number; oscillation?: number; age_min?: number; prior_load?: number; prior_depth?: number; } +interface PanicEvent { + ts: string; + event: 'panic_level_change' | 'panic_orient_reset' | 'hook_intervention' | 'panic_signal_injected'; + from_level?: number; to_level?: number; + panic_score?: number; severity?: string; + orient_kind?: 'normal' | 'rapid' | 'spam'; + delta?: number; from_score?: number; to_score?: number; + intervention_count?: number; + call_triggers?: string[]; + gryph_enriched?: boolean; +} // ============================================================================ // METRIC COMPUTATIONS @@ -188,6 +199,82 @@ function computeRecovery(mcp: McpEvent[], lease: LeaseEvent[]) { }; } +// Exported for testing +export type { PanicEvent, LeaseEvent, McpEvent }; +export { computePanicStats, computeRecovery, computeObstinacy }; + +/** + * Panic stats: episode count, avg recovery latency, hook intercepts, orient spam. + */ +function computePanicStats(panic: PanicEvent[]) { + // Episodes: sequences from first level change up to return to level 0 + const levelChanges = panic.filter(e => e.event === 'panic_level_change'); + const hookIntercepts = panic.filter(e => e.event === 'hook_intervention').length; + const injections = panic.filter(e => e.event === 'panic_signal_injected').length; + + // Episode: starts when level goes from 0→N, ends when N→0 + const episodes: { start: string; end?: string; peak: number }[] = []; + let inEpisode = false; + let peakLevel = 0; + let startTs = ''; + for (const e of levelChanges.sort((a, b) => a.ts.localeCompare(b.ts))) { + const from = e.from_level ?? 0; + const to = e.to_level ?? 0; + if (!inEpisode && from === 0 && to > 0) { + inEpisode = true; peakLevel = to; startTs = e.ts; + } else if (inEpisode) { + if (to > peakLevel) peakLevel = to; + if (to === 0) { + episodes.push({ start: startTs, end: e.ts, peak: peakLevel }); + inEpisode = false; peakLevel = 0; + } + } + } + if (inEpisode) episodes.push({ start: startTs, peak: peakLevel }); + + // Avg recovery latency (ms): episode start to end + const completedEpisodes = episodes.filter(e => e.end); + const recoveryLatencies = completedEpisodes.map(e => + new Date(e.end!).getTime() - new Date(e.start).getTime() + ); + const avgRecoveryMs = recoveryLatencies.length + ? Math.round(recoveryLatencies.reduce((a, b) => a + b, 0) / recoveryLatencies.length) + : null; + + // Failed recovery rate: episodes that never returned to L0 + const failedRate = episodes.length + ? `${episodes.filter(e => !e.end).length}/${episodes.length}` + : '—'; + + // Orient spam events + const orientResets = panic.filter(e => e.event === 'panic_orient_reset'); + const spamOrients = orientResets.filter(e => e.orient_kind === 'spam').length; + const rapidOrients = orientResets.filter(e => e.orient_kind === 'rapid').length; + + // Gryph enrichments + const gryphEnriched = panic.filter(e => e.event === 'hook_intervention' && e.gryph_enriched).length; + + // Trigger frequency across all events + const triggerCounts = new Map(); + for (const e of panic) { + for (const t of e.call_triggers ?? []) { + triggerCounts.set(t, (triggerCounts.get(t) ?? 0) + 1); + } + } + + return { + panic_episodes: episodes.length, + avg_recovery_ms: avgRecoveryMs, + failed_recovery_rate: failedRate, + hook_intercepts: hookIntercepts, + mcp_injections: injections, + orient_spam_events: spamOrients, + orient_rapid_events: rapidOrients, + gryph_enriched_intercepts: gryphEnriched, + trigger_counts: [...triggerCounts.entries()].sort((a, b) => b[1] - a[1]), + }; +} + /** * Trajectory entropy: low entropy oscillation (auth→billing→auth→billing) vs * exploratory (auth→billing→infra→cache). Uses bigram repetition ratio. @@ -218,7 +305,7 @@ function hr() { console.log('─'.repeat(60)); } function section(title: string) { hr(); console.log(` ${title}`); hr(); } function renderSummary( - mcp: McpEvent[], orient: OrientEvent[], cache: CacheEvent[], lease: LeaseEvent[] + mcp: McpEvent[], orient: OrientEvent[], cache: CacheEvent[], lease: LeaseEvent[], panicEvents: PanicEvent[] ) { const tools = computeToolStats(mcp); const cacheStats = computeCacheStats(cache); @@ -226,6 +313,7 @@ function renderSummary( const obstinacy = computeObstinacy(mcp, lease); const recovery = computeRecovery(mcp, lease); const trajectory = computeTrajectoryEntropy(lease); + const panicStats = computePanicStats(panicEvents); section('TOOL LATENCY'); if (tools.stats.length) { @@ -284,6 +372,18 @@ function renderSummary( console.log(` max density : ${trajectory.max_density}`); console.log(` burst events (≥0.6) : ${trajectory.burst_events}`); + section('PANIC RESPONSE'); + console.log(` panic episodes : ${panicStats.panic_episodes}`); + console.log(` avg recovery latency : ${panicStats.avg_recovery_ms != null ? `${panicStats.avg_recovery_ms}ms` : '—'}`); + console.log(` failed recovery rate : ${panicStats.failed_recovery_rate}`); + console.log(` hook intercepts : ${panicStats.hook_intercepts}`); + console.log(` mcp injections : ${panicStats.mcp_injections}`); + console.log(` orient spam events : ${panicStats.orient_spam_events} (rapid: ${panicStats.orient_rapid_events})`); + console.log(` gryph-enriched : ${panicStats.gryph_enriched_intercepts}`); + if (panicStats.trigger_counts.length) { + console.log(` triggers : ${panicStats.trigger_counts.map(([k, v]) => `${k}×${v}`).join(' ')}`); + } + hr(); } @@ -384,18 +484,19 @@ Examples: return; // keep process alive — watcher keeps running } - const [mcp, orient, cache, lease] = await Promise.all([ + const [mcp, orient, cache, lease, panicEvents] = await Promise.all([ readJsonl(join(telDir, 'mcp.jsonl')), readJsonl(join(telDir, 'orient.jsonl')), readJsonl(join(telDir, 'cache.jsonl')), readJsonl(join(telDir, 'epistemic-lease.jsonl')), + readJsonl(join(telDir, 'panic.jsonl')), ]); - if (!mcp.length && !orient.length && !cache.length && !lease.length) { + if (!mcp.length && !orient.length && !cache.length && !lease.length && !panicEvents.length) { console.log(`No telemetry found at ${telDir}`); console.log('Enable with: export OPENLORE_TELEMETRY=1'); return; } - renderSummary(mcp, orient, cache, lease); + renderSummary(mcp, orient, cache, lease, panicEvents); }); diff --git a/src/cli/index.ts b/src/cli/index.ts index 10298875..0357f25c 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -25,6 +25,9 @@ import { testCommand } from './commands/test.js'; import { digestCommand } from './commands/digest.js'; import { decisionsCommand } from './commands/decisions.js'; import { telemetryCommand } from './commands/telemetry.js'; +import { panicCheckCommand } from './commands/panic-check.js'; +import { panicLevelCommand } from './commands/panic-level.js'; +import { gryphWatchCommand } from './commands/gryph-watch.js'; import { configureLogger } from '../utils/logger.js'; // Read version from package.json at runtime so it never drifts from the published version @@ -135,5 +138,8 @@ program.addCommand(testCommand); program.addCommand(digestCommand); program.addCommand(decisionsCommand); program.addCommand(telemetryCommand); +program.addCommand(panicCheckCommand); +program.addCommand(panicLevelCommand); +program.addCommand(gryphWatchCommand); program.parse(); diff --git a/src/core/analyzer/vector-index.ts b/src/core/analyzer/vector-index.ts index 77cde7cc..40d5337e 100644 --- a/src/core/analyzer/vector-index.ts +++ b/src/core/analyzer/vector-index.ts @@ -140,9 +140,14 @@ function rrfScore(rankDense: number, rankSparse: number, k = 60): number { return 1 / (k + rankDense + 1) + 1 / (k + rankSparse + 1); } -// Module-level BM25 corpus cache: avoids a full table scan on every search call -// when the index hasn't changed. Keyed by dbPath; invalidated when row count changes. -const _bm25Cache = new Map(); +// Module-level BM25 corpus cache: avoids a full table scan on every search call. +// Keyed by dbPath; invalidated by build() when the index is rebuilt. +const _bm25Cache = new Map[] }>(); + +// Module-level LanceDB table cache: avoids connect() + openTable() on every search call. +// Invalidated by build() when the index is rebuilt. +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const _tableCache = new Map(); // ============================================================================ // HELPERS @@ -372,6 +377,10 @@ export class VectorIndex { const db = await connect(dbPath); await db.createTable(TABLE_NAME, fullRecords as unknown as Record[], { mode: 'overwrite' }); + // Invalidate search caches — index was just rebuilt + _tableCache.delete(dbPath); + _bm25Cache.delete(dbPath); + return { embedded: toEmbed.length, reused: cachedIdx.length }; } @@ -397,8 +406,6 @@ export class VectorIndex { hybrid?: boolean; } = {} ): Promise { - const { connect } = await import('@lancedb/lancedb'); - const { limit = 10, language, minFanIn, hybrid = true } = opts; if (!VectorIndex.exists(outputDir)) { @@ -406,8 +413,16 @@ export class VectorIndex { } const dbPath = join(outputDir, DB_FOLDER); - const db = await connect(dbPath); - const table = await db.openTable(TABLE_NAME); + let tableEntry = _tableCache.get(dbPath); + if (!tableEntry) { + const { connect } = await import('@lancedb/lancedb'); + const db = await connect(dbPath); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const table: any = await db.openTable(TABLE_NAME); + tableEntry = { table }; + _tableCache.set(dbPath, tableEntry); + } + const table = tableEntry.table; // ── BM25-only path (no embedding service available) ─────────────────────── if (!embedSvc) { @@ -425,7 +440,7 @@ export class VectorIndex { if (!queryVector) throw new Error('Failed to embed query'); const denseFetch = hybrid ? Math.min(limit * 5, 500) : Math.min(limit * 10, 1000); - const denseRows = await table.query().nearestTo(queryVector).limit(denseFetch).toArray(); + const denseRows = await table.query().nearestTo(queryVector).limit(denseFetch).toArray() as Record[]; const passesFilters = (row: Record): boolean => { if (language && (row.language as string) !== language) return false; @@ -446,22 +461,15 @@ export class VectorIndex { let allRows: Record[]; if (!cachedEntry) { - allRows = await table.query().toArray(); + allRows = await table.query().toArray() as Record[]; const corpus = buildBm25Corpus( allRows.map(r => ({ id: r.id as string, text: r.text as string })) ); - cachedEntry = { corpus, rowCount: allRows.length }; + cachedEntry = { corpus, rowCount: allRows.length, rows: allRows }; _bm25Cache.set(dbPath, cachedEntry); } else { - // Lightweight cache validation: re-scan only if row count has changed - allRows = await table.query().toArray(); - if (allRows.length !== cachedEntry.rowCount) { - const corpus = buildBm25Corpus( - allRows.map(r => ({ id: r.id as string, text: r.text as string })) - ); - cachedEntry = { corpus, rowCount: allRows.length }; - _bm25Cache.set(dbPath, cachedEntry); - } + // Use cached rows — invalidated by build() when index is rebuilt + allRows = cachedEntry.rows; } const { corpus } = cachedEntry; @@ -531,21 +539,15 @@ export class VectorIndex { let allRows: Record[]; if (!cachedEntry) { - allRows = await table.query().toArray(); + allRows = await table.query().toArray() as Record[]; const corpus = buildBm25Corpus( allRows.map(r => ({ id: r.id as string, text: r.text as string })) ); - cachedEntry = { corpus, rowCount: allRows.length }; + cachedEntry = { corpus, rowCount: allRows.length, rows: allRows }; _bm25Cache.set(dbPath, cachedEntry); } else { - allRows = await table.query().toArray(); - if (allRows.length !== cachedEntry.rowCount) { - const corpus = buildBm25Corpus( - allRows.map(r => ({ id: r.id as string, text: r.text as string })) - ); - cachedEntry = { corpus, rowCount: allRows.length }; - _bm25Cache.set(dbPath, cachedEntry); - } + // Use cached rows — invalidated by build() when index is rebuilt + allRows = cachedEntry.rows; } const { corpus } = cachedEntry; diff --git a/src/core/services/config-manager.ts b/src/core/services/config-manager.ts index a93c1973..b3d6998f 100644 --- a/src/core/services/config-manager.ts +++ b/src/core/services/config-manager.ts @@ -65,6 +65,7 @@ export function getDefaultConfig(projectType: ProjectType, openspecPath: string) model: DEFAULT_ANTHROPIC_MODEL, domains: 'auto', }, + panicResponse: { mode: 'off' }, createdAt: new Date().toISOString(), lastRun: null, }; diff --git a/src/core/services/mcp-handlers/epistemic-lease.test.ts b/src/core/services/mcp-handlers/epistemic-lease.test.ts index 7d9ce4f4..a9b7919f 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.test.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.test.ts @@ -3,8 +3,20 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { createTracker, updateTracker, injectFreshness, getSourceRoots } from './epistemic-lease.js'; +import { createTracker, updateTracker, updatePanic, injectFreshness, getSourceRoots, trackerToPanicState } from './epistemic-lease.js'; import type { EpistemicTracker } from './epistemic-lease.js'; +import { + PANIC_TRAJECTORY_DENSITY, + PANIC_TRAJECTORY_DELTA, + PANIC_OSCILLATION_THRESHOLD, + PANIC_OSCILLATION_DELTA, + PANIC_STALE_D3_LOCALITY_GATE, + PANIC_STALE_D3_DELTA, + PANIC_REFRACTORY_MS, + PANIC_UP_THRESHOLD, + PANIC_DOWN_THRESHOLD, + HOOK_COOLDOWN_MS, +} from './panic-constants.js'; // ============================================================================ // Mock git hash — default returns stable hash @@ -621,3 +633,378 @@ describe('updateTracker — V3.1 cross-module trajectory', () => { expect(t.moduleAccessWindow).toHaveLength(15); }); }); + +// ============================================================================ +// Panic helpers — policy is now external; tests must call updatePanic() explicitly +// after updateTracker() when they want to observe panic scoring behavior. +// ============================================================================ + +function callBoth(t: EpistemicTracker, tool: string, dir: string, filePath?: string): void { + updateTracker(t, tool, dir, filePath); + // orient is handled by resetTracker() internally; do not double-apply panic scoring. + if (tool !== 'orient') { + updatePanic(t, { density: t.density, oscillation: t.oscillation, weight: 1, staleDepth: t.staleDepth, directory: dir, tool }); + } +} + +// ============================================================================ +// Panic — score accumulation and level transitions +// ============================================================================ + +describe('panic — score and level via updateTracker', () => { + it('starts at panicScore 0, panicLevel 0', () => { + const t = freshTracker(); + expect(t.panicScore).toBe(0); + expect(t.panicLevel).toBe(0); + }); + + it('panicScore increases with oscillation', () => { + const t = freshTracker(); + // Build A→B→A→B oscillation (bigram repetition) driving oscillation score up + for (let i = 0; i < 15; i++) { + const mod = i % 2 === 0 ? 'auth' : 'billing'; + callBoth(t, 'search_code', '/fake/repo', `src/${mod}/x.ts`); + } + expect(t.panicScore).toBeGreaterThan(0); + }); + + it('panicLevel rises to 1 when panicScore >= 30', () => { + const t = freshTracker(); + t.panicScore = 29; + // One more call with high density should push it over 30 + t.moduleAccessWindow = ['auth','billing','auth','billing','auth','billing','auth','billing', + 'auth','billing','auth','billing','auth','billing','auth'] as (string|null)[]; + t.lastModule = 'auth'; + callBoth(t, 'trace_execution_path', '/fake/repo', 'src/billing/x.ts'); + expect(t.panicLevel).toBeGreaterThanOrEqual(1); + }); + + it('staleDepth floors panicLevel via panic ceiling (staleDepth=3 → min L2)', () => { + const t = freshTracker(); + t.panicScore = 0; + // Force stale at depth 3 + t.freshnessState = 'stale'; + t.staleDepth = 3; + callBoth(t, 'list_spec_domains', '/fake/repo'); + // Panic ceiling: staleDepth≥3 → panicLevel ≥ 2 + expect(t.panicLevel).toBeGreaterThanOrEqual(2); + }); + + it('panicLevel resets interventionCountSinceStable when dropping to 0', () => { + const t = freshTracker(); + t.panicLevel = 1; + t.panicScore = 5; // below down-threshold for L1 (20) → drops to L0 + t.interventionCountSinceStable = 5; + callBoth(t, 'list_spec_domains', '/fake/repo'); + expect(t.panicLevel).toBe(0); + expect(t.interventionCountSinceStable).toBe(0); + }); + + it('localityConfidence near 1 at low density', () => { + const t = freshTracker(); + updateTracker(t, 'search_code', '/fake/repo'); + expect(t.localityConfidence).toBeGreaterThan(0.9); + }); +}); + +// ============================================================================ +// Panic — orient spam protection +// ============================================================================ + +describe('panic — orient spam protection', () => { + beforeEach(() => { vi.useFakeTimers(); }); + afterEach(() => { vi.useRealTimers(); }); + + it('normal orient (>2min gap) applies -40 recovery', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); // 3min gap + updateTracker(t, 'orient', '/fake/repo'); + expect(t.panicScore).toBe(10); // 50 - 40 + }); + + it('rapid orient (<2min gap) applies only -15', () => { + const t = freshTracker(); + t.panicScore = 50; + // Simulate a prior orient 30s ago so the next orient is "rapid" + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 1; + updateTracker(t, 'orient', '/fake/repo'); + expect(t.panicScore).toBe(35); // 50 - 15 + }); + + it('3rd+ rapid orient applies 0 recovery (spam)', () => { + const t = freshTracker(); + t.panicScore = 50; + // Simulate 2 prior rapid orients (count already 2) + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 2; + updateTracker(t, 'orient', '/fake/repo'); // count=3 → spam, delta=0 + expect(t.panicScore).toBe(50); // no change + }); + + it('non-rapid orient resets spam counter', () => { + const t = freshTracker(); + t.panicScore = 50; + // Simulate: spam state (2 rapid orients), last orient was 30s ago + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 2; + // Now advance 3min — next orient will be non-rapid + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // counter reset to 0, +1 = 1, non-rapid → -40 + expect(t.panicScore).toBe(10); // 50 - 40 + expect(t.recentOrientCount).toBe(1); + }); + + it('panicScore never goes below 0', () => { + const t = freshTracker(); + t.panicScore = 10; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // -40 would give -30, clamped to 0 + expect(t.panicScore).toBe(0); + }); +}); + +// ============================================================================ +// Panic — signal detection (trajectory_burst, oscillation_spike, stale_depth_3) +// ============================================================================ + +describe('panic — individual signal detection', () => { + beforeEach(() => { vi.useFakeTimers(); }); + afterEach(() => { vi.useRealTimers(); }); + + it('trajectory_burst (+15) fires when density >= 0.60', () => { + const t = freshTracker(); + t.panicScore = 0; + // Fill window with dense cross-module switching (10 distinct modules in 15 slots) + t.moduleAccessWindow = [ + 'a','b','c','d','e','f','g','h','i','j','a','b','c','d','e', + ] as (string|null)[]; + t.lastModule = 'e'; + // density = 14 switches / 15 = 0.93 → trajectory_burst fires + callBoth(t, 'search_code', '/fake/repo', 'src/f/x.ts'); + // +15 trajectory_burst (oscillation may also add +10 if ≥0.50) + expect(t.panicScore).toBeGreaterThanOrEqual(15); + }); + + it('oscillation_spike (+10) fires when oscillation >= 0.50', () => { + const t = freshTracker(); + t.panicScore = 0; + // Pure A→B bigram repetition → oscillation = 1.0 + const window: (string|null)[] = []; + for (let i = 0; i < 14; i++) window.push(i % 2 === 0 ? 'auth' : 'billing'); + t.moduleAccessWindow = window; + t.lastModule = 'billing'; + // This call adds 'auth', creating another A→B→A bigram → oscillation stays high + callBoth(t, 'search_code', '/fake/repo', 'src/auth/x.ts'); + // oscillation_spike (+10) + trajectory_burst (+15) both fire + expect(t.panicScore).toBeGreaterThanOrEqual(10); + }); + + it('stale_depth_3 (+25) fires when staleDepth=3 AND localityConfidence < 0.5', () => { + const t = freshTracker(); + t.panicScore = 0; + t.freshnessState = 'stale'; + t.staleDepth = 3; + // Build low localityConfidence via high density + oscillation in window + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.localityConfidence = 0.1; // already low from previous calls — gate should open + callBoth(t, 'search_code', '/fake/repo', 'src/c/x.ts'); + // trajectory_burst + oscillation_spike + stale_depth_3 all fire + expect(t.panicScore).toBeGreaterThanOrEqual(25); + }); + + it('stale_depth_3 does NOT fire when localityConfidence >= 0.5 (focused stale work)', () => { + const t = freshTracker(); + t.panicScore = 0; + t.freshnessState = 'stale'; + t.staleDepth = 3; + // Empty window → density=0, oscillation=0 → localityConfidence=1.0 + t.moduleAccessWindow = []; + t.localityConfidence = 1.0; + callBoth(t, 'search_code', '/fake/repo'); // no filePath → stays in same module + // stale_depth_3 gate blocked; only decay/locality_recovery may apply + // score should not increase (no upward signals fire at high localityConfidence) + expect(t.panicScore).toBe(0); + }); + + it('locality_recovery (-3) fires when density < 0.10, oscillation < 0.10, staleDepth = 0', () => { + const t = freshTracker(); + t.panicScore = 20; + t.moduleAccessWindow = []; // empty → density=0, oscillation=0 + t.localityConfidence = 1.0; + t.staleDepth = 0; + callBoth(t, 'search_code', '/fake/repo'); // no cross-module activity + // locality_recovery (-3) fires; panicScore should drop + expect(t.panicScore).toBeLessThan(20); + }); +}); + +// ============================================================================ +// Panic — refractory period +// ============================================================================ + +describe('panic — refractory period after orient()', () => { + beforeEach(() => { vi.useFakeTimers(); }); + afterEach(() => { vi.useRealTimers(); }); + + it('orient() sets panicRecoverySuppressionUntil when score reduces', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // -40 → score=10, refractory set + expect(t.panicRecoverySuppressionUntil).toBeGreaterThan(Date.now()); + }); + + it('spam orient (delta=0) does NOT set refractory', () => { + const t = freshTracker(); + t.panicScore = 50; + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 2; // 3rd rapid → spam → delta=0 + updateTracker(t, 'orient', '/fake/repo'); + expect(t.panicRecoverySuppressionUntil).toBe(0); // not set + }); + + it('upward signals suppressed during refractory window', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // sets refractory + const scoreAfterOrient = t.panicScore; + + // Now trigger high density + oscillation conditions + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + t.freshnessState = 'stale'; + t.staleDepth = 3; + callBoth(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); + + // Upward signals blocked by refractory — score should not increase above post-orient value + // (may decrease from decay/locality_recovery, but not increase) + expect(t.panicScore).toBeLessThanOrEqual(scoreAfterOrient); + }); + + it('upward signals resume after refractory window expires', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // sets refractory + const scoreAfterOrient = t.panicScore; + + // Advance past the 45s refractory window + vi.advanceTimersByTime(50_000); + t.panicRecoverySuppressionUntil = Date.now() - 1; // force expiry + + // Now trigger burst conditions + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + t.localityConfidence = 0.0; + callBoth(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); + + // Signals should now fire → score increases + expect(t.panicScore).toBeGreaterThan(scoreAfterOrient); + }); +}); + +// ============================================================================ +// Panic — localityConfidence formula +// ============================================================================ + +describe('panic — localityConfidence formula', () => { + it('high oscillation alone degrades localityConfidence even at low density', () => { + const t = freshTracker(); + // Fill with same-module oscillation: stays in 'auth', no cross-module switches + // but builds up bigram repetition + t.moduleAccessWindow = ['auth','auth','auth','auth','auth','auth','auth','auth', + 'auth','auth','auth','auth','auth','auth','auth'] as (string|null)[]; + // density = 0 (no switches), oscillation = 0 (same module, no bigram repetition) + updateTracker(t, 'search_code', '/fake/repo', 'src/auth/x.ts'); + // low density, low oscillation (all same module) → localityConfidence near 1 + expect(t.localityConfidence).toBeGreaterThan(0.9); + }); + + it('high density alone degrades localityConfidence', () => { + const t = freshTracker(); + // Dense cross-module switching, no oscillation (linear A→B→C→D) + t.moduleAccessWindow = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o'] as (string|null)[]; + t.lastModule = 'o'; + updateTracker(t, 'search_code', '/fake/repo', 'src/p/x.ts'); + // High density → localityConfidence degrades toward 0 + expect(t.localityConfidence).toBeLessThan(0.2); + }); + + it('both density and oscillation combine multiplicatively', () => { + const t = freshTracker(); + // A→B oscillation (high both density and oscillation) + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + updateTracker(t, 'search_code', '/fake/repo', 'src/b/x.ts'); + // Both density and oscillation high → confidence very low (multiplicative kill) + expect(t.localityConfidence).toBeLessThan(0.1); + }); +}); + +// ============================================================================ +// Panic — burst escalation gated by localityConfidence +// ============================================================================ + +describe('panic — burst escalation gate', () => { + it('burst (heavy tool on stale) does NOT escalate to depth 3 at high localityConfidence', () => { + const t = freshTracker(); + t.freshnessState = 'stale'; + t.staleDepth = 1; + t.localityConfidence = 0.9; // high confidence — focused work + t.moduleAccessWindow = []; // empty → density=0 + // trace_execution_path has weight 8 → burst condition met (weight >= BURST_TOOL_WEIGHT_THRESHOLD) + updateTracker(t, 'trace_execution_path', '/fake/repo'); + // Burst escalation blocked by high localityConfidence + expect(t.staleDepth).toBeLessThan(3); + }); + + it('burst escalates to depth 3 when localityConfidence < 0.5', () => { + const t = freshTracker(); + t.freshnessState = 'stale'; + t.staleDepth = 1; + // A→B→A→B oscillation → density + oscillation both high → localityConfidence computed < 0.5 + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + // trace_execution_path (weight=8) → burst condition met; localityConfidence computed from window + updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); + expect(t.staleDepth).toBe(3); + }); +}); + +// ============================================================================ +// trackerToPanicState +// ============================================================================ + +describe('trackerToPanicState', () => { + it('maps tracker fields to PanicState correctly', () => { + const t = freshTracker(); + t.panicScore = 42; + t.panicLevel = 1; + t.localityConfidence = 0.8; + t.recentOrientCount = 2; + t.interventionCountSinceStable = 1; + + const state = trackerToPanicState(t, 'claude-code', 'sess-123'); + + expect(state.schemaVersion).toBe(1); + expect(state.panicScore).toBe(42); + expect(state.panicLevel).toBe(1); + expect(state.localityConfidence).toBe(0.8); + expect(state.recentOrientCount).toBe(2); + expect(state.interventionCountSinceStable).toBe(1); + expect(state.agentId).toBe('claude-code'); + expect(state.sessionId).toBe('sess-123'); + expect(state.updatedAt).toBeTruthy(); + expect(state.lastOrientAt).toBeTruthy(); + }); + + it('agentId and sessionId are optional', () => { + const t = freshTracker(); + const state = trackerToPanicState(t); + expect(state.agentId).toBeUndefined(); + expect(state.sessionId).toBeUndefined(); + }); +}); diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 817f8836..5635a334 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -34,6 +34,20 @@ import { ARTIFACT_CALL_GRAPH_DB, } from '../../../constants.js'; import { emit } from '../telemetry.js'; +import { applyPanicHysteresis } from './panic-response.js'; +import type { PanicLevel, PanicState } from './panic-response.js'; +import { + PANIC_SCORE_MAX, + PANIC_TRAJECTORY_DENSITY, + PANIC_TRAJECTORY_DELTA, + PANIC_OSCILLATION_THRESHOLD, + PANIC_OSCILLATION_DELTA, + PANIC_STALE_D3_LOCALITY_GATE, + PANIC_STALE_D3_DELTA, + PANIC_LOCALITY_RECOVERY, + PANIC_DECAY_PER_MIN, + PANIC_REFRACTORY_MS, +} from './panic-constants.js'; // ============================================================================ // TYPES @@ -63,6 +77,28 @@ export interface EpistemicTracker { lastSwitchAt: number; /** V3.2: oscillation score — repeated bigram transitions / total transitions [0,1]. */ oscillation: number; + /** V3.2: last computed cross-module density [0,1] — stored so callers can read after updateTracker(). */ + density: number; + // Panic fields — behavioral destabilization tracking (separate from freshness) + panicScore: number; + panicLevel: PanicLevel; + /** + * Shared behavioral coherence metric [0,1]. + * Used by: freshness burst gating AND panic escalation gating (stale_depth_3, burst). + * WARNING: changes affect both systems. Modify with full blast-radius awareness. + */ + localityConfidence: number; + recentOrientCount: number; + lastOrientResetAt: number; + interventionCountSinceStable: number; + /** Epoch ms of last panic score update — for passive decay calculation. */ + lastPanicUpdateAt: number; + /** Accumulated signal trigger labels for the current panic episode. */ + panicTriggers: string[]; + /** Epoch ms — upward panic signals suppressed until this time after orient() recovery. */ + panicRecoverySuppressionUntil: number; + /** Revision of the last panic-state.json write (from MCP or Gryph sync). Used for CAS monotonicity. */ + panicRevision: number; } // ============================================================================ @@ -155,6 +191,127 @@ const SWITCH_DAMPENING_MS = 5_000; const BURST_DENSITY_THRESHOLD = 0.60; // density for post-stale burst escalation const BURST_TOOL_WEIGHT_THRESHOLD = 8; // tool weight for post-stale burst escalation +// Panic constants +const RAPID_ORIENT_INTERVAL_MS = 2 * 60 * 1000; // orients within 2min are "rapid" +// Panic signal thresholds and weights imported from panic-constants.ts + +// ============================================================================ +// PANIC UPDATE +// Called on every tool call with current density/oscillation signals. +// Score delta: positive from instability signals, negative from orient resets. +// ============================================================================ + +interface PanicProvenanceItem { + name: string; + delta: number; + evidence: Record; +} + +export function updatePanic( + tracker: EpistemicTracker, + opts: { density: number; oscillation: number; weight: number; staleDepth: number; directory?: string; tool?: string }, +): void { + const { density, oscillation, staleDepth, directory = '', tool = '' } = opts; + const now = Date.now(); + const inRefractory = tracker.panicRecoverySuppressionUntil > now; + + // Passive wall-clock decay: -5 per minute elapsed since last update + const elapsedMin = tracker.lastPanicUpdateAt > 0 + ? Math.max(0, (now - tracker.lastPanicUpdateAt) / 60_000) + : 0; + const decayDelta = -Math.floor(elapsedMin * PANIC_DECAY_PER_MIN); + + let delta = decayDelta; + const provenance: PanicProvenanceItem[] = []; + if (decayDelta < 0) { + provenance.push({ name: 'passive_decay', delta: decayDelta, evidence: { elapsed_min: Math.round(elapsedMin * 100) / 100 } }); + } + + // localityConfidence is computed in updateTracker() and stored in tracker. + // Read it here so signal gating uses the current value. + const localityConfidence = tracker.localityConfidence; + + // Upward signals — suppressed during refractory period after orient() recovery + if (!inRefractory) { + if (density >= PANIC_TRAJECTORY_DENSITY) { + const d = PANIC_TRAJECTORY_DELTA; + delta += d; + provenance.push({ name: 'trajectory_burst', delta: d, evidence: { density } }); + } + if (oscillation >= PANIC_OSCILLATION_THRESHOLD) { + const d = PANIC_OSCILLATION_DELTA; + delta += d; + provenance.push({ name: 'oscillation_spike', delta: d, evidence: { oscillation } }); + } + // stale_depth_3 signal gated by localityConfidence: a stale agent doing focused local + // work (high confidence) is much less risky than a stale agent in behavioral drift. + if (staleDepth >= 3 && localityConfidence < PANIC_STALE_D3_LOCALITY_GATE) { + const d = PANIC_STALE_D3_DELTA; + delta += d; + provenance.push({ name: 'stale_depth_3', delta: d, evidence: { stale_depth: staleDepth, locality_confidence: localityConfidence } }); + } + } + + // Locality recovery — always applies, not gated by refractory + if (density < 0.10 && oscillation < 0.10 && staleDepth === 0) { + const d = -PANIC_LOCALITY_RECOVERY; + delta += d; + provenance.push({ name: 'locality_recovery', delta: d, evidence: { density, oscillation } }); + } + + const scoreBefore = tracker.panicScore; + tracker.lastPanicUpdateAt = now; + tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + delta)); + + // Accumulate trigger names for the current episode (upward signals only) + const upwardTriggers = provenance.filter(p => p.delta > 0).map(p => p.name); + for (const t of upwardTriggers) { + if (!tracker.panicTriggers.includes(t)) tracker.panicTriggers.push(t); + } + + const prevLevel = tracker.panicLevel; + tracker.panicLevel = applyPanicHysteresis(tracker.panicLevel, tracker.panicScore, staleDepth); + + // Emit provenance trace whenever score changes with active signals + if (tracker.panicScore !== scoreBefore && provenance.length > 0) { + emit(directory, 'panic', { + event: 'panic_score_delta', + tool, + score_before: scoreBefore, + score_after: tracker.panicScore, + delta, + in_refractory: inRefractory, + stale_depth: staleDepth, + density, + oscillation, + triggers: provenance, + }); + } + + if (tracker.panicLevel !== prevLevel) { + const levelTrigger = staleDepth >= 2 && tracker.panicLevel > prevLevel ? 'ceiling' : 'score'; + emit(directory, 'panic', { + event: 'panic_level_change', + tool, + from_level: prevLevel, + to_level: tracker.panicLevel, + score_before: scoreBefore, + panic_score: tracker.panicScore, + density, + oscillation, + stale_depth: staleDepth, + in_refractory: inRefractory, + trigger: levelTrigger, + provenance, + }); + } + + if (tracker.panicLevel === 0 && prevLevel > 0) { + tracker.interventionCountSinceStable = 0; + tracker.panicTriggers = []; + } +} + // ============================================================================ // GIT HASH // ============================================================================ @@ -245,13 +402,19 @@ function computeCrossModuleDensity(window: (string | null)[]): number { function computeOscillationScore(window: (string | null)[]): number { const modules = window.filter((m): m is string => m !== null); if (modules.length < 3) return 0; + // Compute over transition sequence (entries where module actually changed). + // A→A→A→A → 0 transitions → oscillation = 0 (focused local work, not confusion). + // A→B→A→B → transitions [A,B,A,B] → oscillation = 1.0 (pure confusion loop). + const transitions: string[] = [modules[0]!]; + for (let i = 1; i < modules.length; i++) { + if (modules[i] !== modules[i - 1]) transitions.push(modules[i]!); + } + if (transitions.length < 3) return 0; let repeated = 0; - let total = 0; - for (let i = 2; i < modules.length; i++) { - total++; - if (modules[i] === modules[i - 2]) repeated++; + for (let i = 2; i < transitions.length; i++) { + if (transitions[i] === transitions[i - 2]) repeated++; } - return total > 0 ? repeated / total : 0; + return repeated / (transitions.length - 2); } // ============================================================================ @@ -284,17 +447,77 @@ export function createTracker(directory: string): EpistemicTracker { lastDensityPenaltyAt: 0, lastSwitchAt: 0, oscillation: 0, + density: 0, + panicScore: 0, + panicLevel: 0, + localityConfidence: 1, + recentOrientCount: 0, + lastOrientResetAt: 0, + interventionCountSinceStable: 0, + lastPanicUpdateAt: 0, + panicTriggers: [], + panicRecoverySuppressionUntil: 0, + panicRevision: 0, }; } function resetTracker(tracker: EpistemicTracker, directory: string): void { + const now = Date.now(); + + // Panic: orient spam protection — diminishing recovery bonus on rapid reuse + const timeSinceLastOrient = now - tracker.lastOrientResetAt; + if (timeSinceLastOrient >= RAPID_ORIENT_INTERVAL_MS) { + tracker.recentOrientCount = 0; // non-rapid: reset spam counter + } + tracker.recentOrientCount++; + tracker.lastOrientResetAt = now; + + let panicDelta: number; + let orientKind: 'normal' | 'rapid' | 'spam'; + if (tracker.recentOrientCount >= 3) { + panicDelta = 0; orientKind = 'spam'; + } else if (timeSinceLastOrient < RAPID_ORIENT_INTERVAL_MS) { + panicDelta = -15; orientKind = 'rapid'; + } else { + panicDelta = -40; orientKind = 'normal'; + } + + const prevScore = tracker.panicScore; + const prevLevel = tracker.panicLevel; + tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + panicDelta)); + tracker.localityConfidence = 1; + tracker.panicLevel = applyPanicHysteresis(tracker.panicLevel, tracker.panicScore, 0); + if (tracker.panicLevel === 0) { + tracker.interventionCountSinceStable = 0; + tracker.panicTriggers = []; + } + // Set refractory window when orient() achieves actual score reduction. + // Suppresses upward signals for 45s to let recovery land before re-escalating. + // Subsequent orient() calls during an active refractory replace the deadline + // (not extend): the window always starts fresh from the most recent recovery. + if (panicDelta < 0) { + tracker.panicRecoverySuppressionUntil = now + PANIC_REFRACTORY_MS; + } + + emit(directory, 'panic', { + event: 'panic_orient_reset', + orient_kind: orientKind, + delta: panicDelta, + from_score: prevScore, + to_score: tracker.panicScore, + from_level: prevLevel, + to_level: tracker.panicLevel, + recent_orient_count: tracker.recentOrientCount, + time_since_last_ms: tracker.lastOrientResetAt === now ? timeSinceLastOrient : 0, + }); + tracker.lastOrientAt = new Date(); tracker.graphVersionAtOrient = getGitHash(directory); tracker.cognitiveLoad = 0; tracker.modulesVisited = new Set(); tracker.freshnessState = 'fresh'; tracker.staleDepth = 0; - tracker.lastGitCheckAt = Date.now(); + tracker.lastGitCheckAt = now; tracker.lastModule = null; tracker.moduleAccessWindow = []; tracker.lastDensityPenaltyAt = 0; @@ -344,16 +567,25 @@ export function updateTracker( const density = computeCrossModuleDensity(tracker.moduleAccessWindow); const oscillation = computeOscillationScore(tracker.moduleAccessWindow); tracker.oscillation = oscillation; + tracker.density = density; + // localityConfidence is shared behavioral state: used by freshness (burst gate) + // and panic (stale_depth_3 gate, burst escalation gate). Computed here so it's + // always current regardless of whether panic scoring is enabled. + tracker.localityConfidence = Math.max(0, (1 - Math.min(1, density * 2)) * (1 - Math.min(1, oscillation))); // Already stale — time-based depth escalation only, plus V3.2 burst sensitivity. // Load stops accumulating here; burst detection uses tool weight and density instead. if (tracker.freshnessState === 'stale') { - // Post-stale burst: heavy architectural tool or trajectory burst → immediate depth 3 - if (tracker.staleDepth < 3 && (weight >= BURST_TOOL_WEIGHT_THRESHOLD || density >= BURST_DENSITY_THRESHOLD)) { + // Post-stale burst: heavy architectural tool or trajectory burst → immediate depth 3. + // Gated by localityConfidence: a stale agent doing focused local work is not bursting. + // High confidence (≥0.5) suppresses burst escalation — only clear behavioral drift triggers it. + const isBurst = weight >= BURST_TOOL_WEIGHT_THRESHOLD || density >= BURST_DENSITY_THRESHOLD; + if (tracker.staleDepth < 3 && isBurst && tracker.localityConfidence < PANIC_STALE_D3_LOCALITY_GATE) { emit(directory, 'epistemic-lease', { event: 'depth_escalate', from_depth: tracker.staleDepth, to_depth: 3, tool: toolName, module: mod, cognitive_load: tracker.cognitiveLoad, density, oscillation, age_min: Math.floor(ageMs / 60_000), trigger: 'burst', + locality_confidence: tracker.localityConfidence, }); tracker.staleDepth = 3; return; @@ -521,3 +753,23 @@ export function injectFreshness(text: string, tracker: EpistemicTracker): string if (!signal) return text; return signal.prepend ? signal.text + text : text + signal.text; } + +export function trackerToPanicState(tracker: EpistemicTracker, agentId?: string, sessionId?: string): PanicState { + return { + schemaVersion: 1, + panicScore: tracker.panicScore, + panicLevel: tracker.panicLevel, + updatedAt: new Date().toISOString(), + lastOrientAt: tracker.lastOrientAt.toISOString(), + recentOrientCount: tracker.recentOrientCount, + localityConfidence: tracker.localityConfidence, + interventionCountSinceStable: tracker.interventionCountSinceStable, + triggers: [...tracker.panicTriggers], + panicRecoverySuppressionUntil: tracker.panicRecoverySuppressionUntil > Date.now() + ? new Date(tracker.panicRecoverySuppressionUntil).toISOString() + : undefined, + agentId, + sessionId, + revision: tracker.panicRevision, + }; +} diff --git a/src/core/services/mcp-handlers/gryph-bridge.test.ts b/src/core/services/mcp-handlers/gryph-bridge.test.ts new file mode 100644 index 00000000..6f12b5e8 --- /dev/null +++ b/src/core/services/mcp-handlers/gryph-bridge.test.ts @@ -0,0 +1,376 @@ +/** + * Tests for gryph-bridge.ts — RuntimeBehaviorProvider, GryphBehaviorProvider, + * startGryphPolling lifecycle (single-flight, async isolation, panic state updates, + * tracker sync, provenance attribution, telemetry). + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { mkdtemp, mkdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { OPENLORE_DIR } from '../../../constants.js'; +import { + GryphBehaviorProvider, + startGryphPolling, + applyGryphDelta, + queryGryphSignals, + _resetGryphAvailabilityForTesting, +} from './gryph-bridge.js'; +import type { RuntimeBehaviorProvider, RuntimeBehaviorSnapshot } from './gryph-bridge.js'; +import { readPanicState, writePanicState, defaultPanicState } from './panic-response.js'; +import type { EpistemicTracker } from './epistemic-lease.js'; +import { + GRYPH_RETRY_BURST_DELTA, + GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA, + GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA, + GRYPH_POLL_INTERVAL_MS, + PANIC_DECAY_PER_MIN, +} from './panic-constants.js'; + +// ============================================================================ +// Helpers +// ============================================================================ + +function makeTracker(overrides: Partial = {}): EpistemicTracker { + return { + lastOrientAt: new Date(), + graphVersionAtOrient: 'abc', + cogLoad: 0, + freshnessState: 'fresh', + staleDepth: 0, + recentModules: [], + density: 0, + oscillation: 0, + localityConfidence: 1, + panicScore: 0, + panicLevel: 0, + recentOrientCount: 0, + lastOrientResetAt: 0, + interventionCountSinceStable: 0, + lastPanicUpdateAt: 0, + panicTriggers: [], + panicRecoverySuppressionUntil: 0, + panicRevision: 0, + ...overrides, + } as EpistemicTracker; +} + +class FixedProvider implements RuntimeBehaviorProvider { + constructor(private snapshot: RuntimeBehaviorSnapshot | null) {} + async collect(_since: string): Promise { + return this.snapshot; + } +} + +class CountingProvider implements RuntimeBehaviorProvider { + calls = 0; + snapshots: Array = []; + constructor(private responses: Array = []) {} + async collect(_since: string): Promise { + this.calls++; + const snap = this.responses.shift() ?? null; + this.snapshots.push(snap); + return snap; + } +} + +class SlowProvider implements RuntimeBehaviorProvider { + running = 0; + maxConcurrent = 0; + async collect(_since: string): Promise { + this.running++; + this.maxConcurrent = Math.max(this.maxConcurrent, this.running); + await new Promise(r => setTimeout(r, 50)); + this.running--; + return null; + } +} + +// ============================================================================ +// applyGryphDelta — backward compat path +// ============================================================================ + +describe('applyGryphDelta', () => { + it('retry burst adds delta', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.1, repetitiveRetryBurst: true, largePatchWhileStale: false, largePatchLoc: 0 }, false, triggers); + expect(score).toBe(GRYPH_RETRY_BURST_DELTA); + expect(triggers).toContain('repetitive_retry_burst'); + }); + + it('large patch while stale — low entropy applies heavy delta', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.1, repetitiveRetryBurst: false, largePatchWhileStale: true, largePatchLoc: 600 }, true, triggers); + expect(score).toBe(GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA); + expect(triggers).toContain('large_patch_stale'); + }); + + it('large patch while stale — high entropy attenuated', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.8, repetitiveRetryBurst: false, largePatchWhileStale: true, largePatchLoc: 600 }, true, triggers); + expect(score).toBe(GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA); + expect(triggers).toContain('large_patch_attenuated'); + }); + + it('large patch NOT stale — no delta', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.1, repetitiveRetryBurst: false, largePatchWhileStale: true, largePatchLoc: 600 }, false, triggers); + expect(score).toBe(0); + }); + + it('clamps at 100', () => { + const score = applyGryphDelta(95, { commandEntropy: 0.1, repetitiveRetryBurst: true, largePatchWhileStale: true, largePatchLoc: 600 }, true, []); + expect(score).toBe(100); + }); +}); + +// ============================================================================ +// GryphBehaviorProvider — mocked child_process +// ============================================================================ + +describe('GryphBehaviorProvider', () => { + it('returns null when gryph not available', async () => { + vi.mock('node:child_process', () => ({ + spawnSync: vi.fn(() => ({ status: 1, stdout: null })), + spawn: vi.fn(), + })); + const provider = new GryphBehaviorProvider(); + const result = await provider.collect(new Date().toISOString()); + // may return null (gryph unavailable) or a snapshot — just must not throw + expect(result === null || typeof result === 'object').toBe(true); + vi.restoreAllMocks(); + }); +}); + +// ============================================================================ +// queryGryphSignals — backward compat +// ============================================================================ + +describe('queryGryphSignals', () => { + it('returns null when gryph unavailable', () => { + _resetGryphAvailabilityForTesting(false); + const result = queryGryphSignals(new Date().toISOString()); + expect(result).toBeNull(); + }); +}); + +// ============================================================================ +// startGryphPolling — lifecycle +// ============================================================================ + +describe('startGryphPolling', () => { + let dir: string; + + beforeEach(async () => { + vi.useFakeTimers(); + dir = await mkdtemp(join(tmpdir(), 'gryph-test-')); + await mkdir(join(dir, OPENLORE_DIR, 'telemetry'), { recursive: true }); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + it('calls provider after first interval', async () => { + const provider = new CountingProvider([null]); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + expect(provider.calls).toBe(0); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + expect(provider.calls).toBe(1); + + stop(); + }); + + it('stops polling after cleanup call', async () => { + const provider = new CountingProvider([null, null, null]); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + stop(); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS * 3); + expect(provider.calls).toBe(1); + }); + + it('single-flight: overlapping poll skipped', async () => { + const slow = new SlowProvider(); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider: slow }); + + // Fire two intervals while first poll is still running (50ms delay) + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + await vi.advanceTimersByTimeAsync(100); // let slow poll finish + + expect(slow.maxConcurrent).toBe(1); + stop(); + }); + + it('null snapshot — no panic state written', async () => { + const provider = new FixedProvider(null); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + // panic-state.json should not exist (no prior state) + const state = readPanicState(dir); + expect(state.panicScore).toBe(0); + }); + + it('snapshot with no actionable signals — no state update', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.8, + repetitiveRetryBurst: false, + shellActivity: true, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(0); + }); + + it('retry burst signal — updates panic state and syncs tracker', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + shellActivity: true, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(GRYPH_RETRY_BURST_DELTA); + expect(tracker.panicScore).toBe(GRYPH_RETRY_BURST_DELTA); + }); + + it('large patch while stale — updates panic state', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: false, + largePatchWhileStale: { loc: 800, entropy: 0.1 }, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker({ staleDepth: 2 }); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA); + }); + + it('large patch NOT stale — no delta', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: false, + largePatchWhileStale: { loc: 800, entropy: 0.1 }, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker({ staleDepth: 0 }); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(0); + }); + + it('provenance carries source:gryph', async () => { + const emitted: unknown[] = []; + vi.spyOn(await import('../telemetry.js'), 'emit').mockImplementation( + (_dir, _domain, payload) => { emitted.push(payload); }, + ); + + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const delta = emitted.find( + (e): e is Record => + typeof e === 'object' && e !== null && (e as Record)['event'] === 'panic_score_delta', + ); + expect(delta).toBeDefined(); + expect(delta?.['source']).toBe('gryph'); + const provenance = delta?.['provenance'] as Array>; + expect(provenance?.[0]?.['evidence']).toMatchObject({ source: 'gryph' }); + }); + + it('provider exception — fail-open, no throw', async () => { + const broken: RuntimeBehaviorProvider = { + async collect() { throw new Error('network error'); }, + }; + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider: broken }); + + await expect(vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100)).resolves.not.toThrow(); + stop(); + + expect(tracker.panicScore).toBe(0); + }); + + it('null tracker — still writes panic state', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + }; + const provider = new FixedProvider(snapshot); + const stop = startGryphPolling({ directory: dir, getTracker: () => null, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(GRYPH_RETRY_BURST_DELTA); + }); + + it('accumulates score across polls', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + }; + const provider = new CountingProvider([snapshot, snapshot]); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + stop(); + + // Second poll applies decay for time elapsed since first poll (GRYPH_POLL_INTERVAL_MS). + const decayPerPoll = Math.floor((GRYPH_POLL_INTERVAL_MS / 60_000) * PANIC_DECAY_PER_MIN); + const expected = GRYPH_RETRY_BURST_DELTA * 2 - decayPerPoll; + const state = readPanicState(dir); + expect(state.panicScore).toBe(expected); + expect(tracker.panicScore).toBe(expected); + }); +}); diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts new file mode 100644 index 00000000..d727ffc8 --- /dev/null +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -0,0 +1,511 @@ +/** + * Gryph bridge — runtime behavioral observability provider. + * + * Promotes Gryph from optional score enrichment to first-class behavioral source. + * Runs a background poll loop that updates panic state independently of MCP tool + * calls, closing the blind spot where agents work purely via Bash/Edit/Read. + * + * Architecture: + * RuntimeBehaviorProvider (interface) + * └── GryphBehaviorProvider (impl: gryph query CLI) + * └── startGryphPolling (background loop → panic state) + * + * All failures degrade to zero-impact null semantics: + * - gryph binary absent → null + * - timeout → null + * - malformed output → null + * - any exception → null + * + * The poll loop MUST NOT block MCP execution, delay tool responses, or overlap. + */ + +import { spawnSync, spawn } from 'node:child_process'; +import { existsSync } from 'node:fs'; +import { emit } from '../telemetry.js'; +import { readPanicState, writePanicState, casWritePanicState, applyPanicHysteresis } from './panic-response.js'; +import type { PanicState, PanicLevel } from './panic-response.js'; +import type { EpistemicTracker } from './epistemic-lease.js'; +import { + PANIC_SCORE_MAX, + GRYPH_RETRY_BURST_DELTA, + GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA, + GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA, + GRYPH_LARGE_PATCH_LOC_THRESHOLD, + GRYPH_ENTROPY_LOW_THRESHOLD, + GRYPH_ENTROPY_HIGH_THRESHOLD, + GRYPH_FAILING_RATE_THRESHOLD, + PANIC_DECAY_PER_MIN, + GRYPH_POLL_INTERVAL_MS, + GRYPH_POLL_INTERVAL_MIN_MS, +} from './panic-constants.js'; + +// ============================================================================ +// TYPES +// ============================================================================ + +/** Behavioral snapshot from a runtime observability source. */ +export interface RuntimeBehaviorSnapshot { + timestamp: number; + commandEntropy?: number; + repetitiveRetryBurst?: boolean; + failingCommandRate?: number; + largePatchWhileStale?: { loc: number; entropy: number }; + commandCount?: number; + shellActivity?: boolean; +} + +/** Abstraction for runtime behavioral data sources. */ +export interface RuntimeBehaviorProvider { + collect(since: string): Promise; +} + +/** Kept for backward compat with panic-check.ts enrichment path. */ +export interface GryphSignals { + commandEntropy: number; + repetitiveRetryBurst: boolean; + largePatchWhileStale: boolean; + largePatchLoc: number; +} + +interface GryphExecEvent { + // PascalCase — actual Gryph schema + Command?: string; + ExitCode?: number; + ResultStatus?: string; + Timestamp?: string; + // snake_case / camelCase — kept for custom/future sources + command?: string; + cmd?: string; + exit_code?: number; + exitCode?: number; + result_status?: string; +} + +interface GryphWriteEvent { + // PascalCase — actual Gryph schema + Path?: string; + LinesAdded?: number; + LinesRemoved?: number; + Timestamp?: string; + // snake_case / camelCase — kept for custom/future sources + path?: string; + file?: string; + lines?: number; + loc?: number; + additions?: number; +} + +interface SnapshotDeltaResult { + newScore: number; + newLevel: PanicLevel; + provenance: Array<{ name: string; delta: number; evidence: Record }>; +} + +// ============================================================================ +// CONSTANTS +// ============================================================================ + +const GRYPH_TIMEOUT_MS = Math.max(50, Number(process.env['OPENLORE_GRYPH_TIMEOUT_MS'] ?? 150)); +const GRYPH_DETECT_TIMEOUT_MS = 50; + +// ============================================================================ +// ENTROPY COMPUTATION +// ============================================================================ + +function computeCommandEntropy(commands: string[]): number { + if (commands.length === 0) return 1; + const counts = new Map(); + for (const cmd of commands) { + const key = cmd.trim().split(/\s+/)[0] ?? cmd; + counts.set(key, (counts.get(key) ?? 0) + 1); + } + const n = commands.length; + let entropy = 0; + for (const count of counts.values()) { + const p = count / n; + entropy -= p * Math.log2(p); + } + const maxEntropy = Math.log2(Math.max(counts.size, 1)); + return maxEntropy > 0 ? Math.min(1, entropy / maxEntropy) : 1; +} + +// ============================================================================ +// GRYPH DETECTION +// ============================================================================ + +let _gryphAvailable: boolean | undefined; +let _gryphBin = 'gryph'; + +/** Reset availability cache — for testing only. */ +export function _resetGryphAvailabilityForTesting(available = false): void { + _gryphAvailable = available; + _gryphBin = 'gryph'; +} + +function isGryphAvailable(): boolean { + if (_gryphAvailable !== undefined) return _gryphAvailable; + // Try PATH-resolution first (fast, works in interactive shells) + const result = spawnSync('which', ['gryph'], { + timeout: GRYPH_DETECT_TIMEOUT_MS, + stdio: ['ignore', 'pipe', 'ignore'], + }); + const fromPath = result.status === 0 ? result.stdout?.toString().trim() : ''; + if (fromPath) { + _gryphBin = fromPath; + _gryphAvailable = true; + return true; + } + // Fallback: check common install locations (hook environments often have restricted PATH) + const home = process.env['HOME'] ?? ''; + const candidates = [ + `${home}/.local/bin/gryph`, + `${home}/go/bin/gryph`, + '/usr/local/bin/gryph', + '/opt/homebrew/bin/gryph', + ]; + for (const p of candidates) { + if (existsSync(p)) { _gryphBin = p; _gryphAvailable = true; return true; } + } + _gryphAvailable = false; + return false; +} + +// ============================================================================ +// QUERY HELPERS +// ============================================================================ + +/** Synchronous query — used by the backward-compat panic-check enrichment path. */ +function queryGryphSync(action: 'exec' | 'write', since: string): unknown[] { + const result = spawnSync( + _gryphBin, + ['query', '--format', 'json', '--action', action, '--since', since], + { timeout: GRYPH_TIMEOUT_MS, stdio: ['ignore', 'pipe', 'ignore'], encoding: 'utf-8' }, + ); + if (result.status !== 0 || !result.stdout) return []; + try { + const parsed = JSON.parse(result.stdout.trim()); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} + +/** Async query — used by GryphBehaviorProvider polling path (non-blocking). */ +async function queryGryphAsync(action: 'exec' | 'write', since: string): Promise { + return new Promise((resolve) => { + const child = spawn( + _gryphBin, + ['query', '--format', 'json', '--action', action, '--since', since], + { stdio: ['ignore', 'pipe', 'ignore'] }, + ); + const timer = setTimeout(() => { child.kill(); resolve([]); }, GRYPH_TIMEOUT_MS); + let output = ''; + child.stdout.on('data', (chunk: Buffer) => { output += chunk.toString(); }); + child.on('close', (code) => { + clearTimeout(timer); + if (code !== 0 || !output) { resolve([]); return; } + try { + const parsed = JSON.parse(output.trim()); + resolve(Array.isArray(parsed) ? parsed : []); + } catch { + resolve([]); + } + }); + child.on('error', () => { clearTimeout(timer); resolve([]); }); + }); +} + +// ============================================================================ +// SNAPSHOT DELTA — applies RuntimeBehaviorSnapshot to a panic state +// ============================================================================ + +function applySnapshotDelta( + snapshot: RuntimeBehaviorSnapshot, + state: PanicState, + staleDepth: number, +): SnapshotDeltaResult { + const now = Date.now(); + const elapsedMin = state.updatedAt + ? Math.max(0, (now - new Date(state.updatedAt).getTime()) / 60_000) + : 0; + const decayDelta = -Math.floor(elapsedMin * PANIC_DECAY_PER_MIN); + + let delta = decayDelta; + const provenance: SnapshotDeltaResult['provenance'] = []; + if (decayDelta < 0) { + provenance.push({ name: 'passive_decay', delta: decayDelta, evidence: { elapsed_min: Math.round(elapsedMin * 100) / 100 } }); + } + + const isStale = staleDepth >= 2; + + if (snapshot.repetitiveRetryBurst) { + delta += GRYPH_RETRY_BURST_DELTA; + provenance.push({ + name: 'gryph_retry_burst', + delta: GRYPH_RETRY_BURST_DELTA, + evidence: { source: 'gryph', entropy: snapshot.commandEntropy ?? null }, + }); + } + + if (snapshot.largePatchWhileStale && isStale) { + const { loc, entropy } = snapshot.largePatchWhileStale; + const attenuated = entropy > GRYPH_ENTROPY_HIGH_THRESHOLD; + const d = attenuated ? GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA : GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA; + delta += d; + provenance.push({ + name: 'large_patch_while_stale', + delta: d, + evidence: { source: 'gryph', loc, entropy }, + }); + } + + if (delta === 0 || (delta === decayDelta && state.panicScore === 0)) { + return { newScore: state.panicScore, newLevel: state.panicLevel, provenance: [] }; + } + + const newScore = Math.min(PANIC_SCORE_MAX, Math.max(0, state.panicScore + delta)); + const newLevel = applyPanicHysteresis(state.panicLevel, newScore, staleDepth); + return { newScore, newLevel, provenance }; +} + +// ============================================================================ +// GryphBehaviorProvider — RuntimeBehaviorProvider implementation +// ============================================================================ + +export class GryphBehaviorProvider implements RuntimeBehaviorProvider { + async collect(since: string): Promise { + try { + if (!isGryphAvailable()) return null; + + const [execEvents, writeEvents] = await Promise.all([ + queryGryphAsync('exec', since) as Promise, + queryGryphAsync('write', since) as Promise, + ]); + + const commands = (execEvents as GryphExecEvent[]) + .map(e => e.Command ?? e.command ?? e.cmd ?? '') + .filter(Boolean); + const commandEntropy = computeCommandEntropy(commands); + + const failingCount = (execEvents as GryphExecEvent[]) + .filter(e => { + const status = e.ResultStatus ?? e.result_status; + return status === 'error' || (e.ExitCode ?? e.exit_code ?? e.exitCode ?? 0) !== 0; + }).length; + const failingCommandRate = execEvents.length > 0 ? failingCount / execEvents.length : 0; + // Low entropy + any failure (pure retry loop) OR high failure rate regardless of entropy + const repetitiveRetryBurst = + (commandEntropy < GRYPH_ENTROPY_LOW_THRESHOLD && failingCount > 0) || + failingCommandRate > GRYPH_FAILING_RATE_THRESHOLD; + + const locs = (writeEvents as GryphWriteEvent[]).map( + e => e.LinesAdded ?? e.lines ?? e.loc ?? e.additions ?? 0, + ); + const maxLoc = locs.length > 0 ? Math.max(...locs) : 0; + + return { + timestamp: Date.now(), + commandEntropy, + repetitiveRetryBurst, + failingCommandRate, + largePatchWhileStale: maxLoc > GRYPH_LARGE_PATCH_LOC_THRESHOLD + ? { loc: maxLoc, entropy: commandEntropy } + : undefined, + commandCount: commands.length, + shellActivity: execEvents.length > 0, + }; + } catch { + return null; + } + } +} + +// ============================================================================ +// POLLING LIFECYCLE +// ============================================================================ + +export interface GryphPollingOptions { + directory: string; + /** Returns current stale depth from in-memory tracker. */ + getTracker: () => EpistemicTracker | null; + /** Optional provider override (for testing). */ + provider?: RuntimeBehaviorProvider; +} + +/** One active poller per workspace directory — enforced by startGryphPolling. */ +const _pollerRegistry = new Map void>(); + +/** + * Start background Gryph polling. Returns a cleanup function (call on shutdown). + * + * Invariants: + * - One per workspace: registry stops any existing poller for the same directory + * - Never overlaps: single-flight protection skips polls while previous is running + * - Never blocks: async spawn, isolated from MCP execution path + * - Never throws: all errors caught, fail-open + * - CAS writes: uses compare-and-swap to prevent overwriting concurrent MCP writes + * - Syncs tracker: panicScore/panicLevel/panicRevision updated in-memory after write + * so the MCP path doesn't overwrite Gryph-elevated state on the next tool call + */ +export function startGryphPolling(opts: GryphPollingOptions): () => void { + const { directory, getTracker, provider = new GryphBehaviorProvider() } = opts; + + // Enforce one-per-workspace: stop any existing poller for this directory + _pollerRegistry.get(directory)?.(); + + const intervalMs = Math.max( + GRYPH_POLL_INTERVAL_MIN_MS, + Number(process.env['OPENLORE_GRYPH_POLL_INTERVAL_MS'] ?? GRYPH_POLL_INTERVAL_MS), + ); + + let isPolling = false; + let lastPollAt = new Date(Date.now() - intervalMs).toISOString(); + let stopped = false; + + const poll = async (): Promise => { + if (isPolling) return; + isPolling = true; + try { + const since = lastPollAt; + lastPollAt = new Date().toISOString(); + + const snapshot = await provider.collect(since); + + emit(directory, 'panic', { + event: 'gryph_poll', + success: snapshot !== null, + shell_activity: snapshot?.shellActivity ?? false, + }); + + if (!snapshot) return; + + // No actionable signals — skip state update + if (!snapshot.repetitiveRetryBurst && !snapshot.largePatchWhileStale) return; + + const tracker = getTracker(); + const staleDepth = tracker?.staleDepth ?? 0; + + // CAS write with one retry on conflict (MCP may write between our read and write). + // All ops inside casWritePanicState are synchronous — atomic within the Node.js event loop. + let readState = readPanicState(directory); + let applyResult = applySnapshotDelta(snapshot, readState, staleDepth); + if (applyResult.newScore === readState.panicScore && applyResult.newLevel === readState.panicLevel) return; + + for (let attempt = 0; attempt < 2; attempt++) { + const candidate: PanicState = { + ...readState, + panicScore: applyResult.newScore, + panicLevel: applyResult.newLevel, + updatedAt: new Date().toISOString(), + triggers: [...(readState.triggers ?? []), ...applyResult.provenance.map(p => p.name)], + }; + if (casWritePanicState(directory, readState.revision, candidate)) { + const writtenRevision = readState.revision + 1; + // Sync in-memory tracker so MCP path doesn't overwrite with stale state + if (tracker) { + tracker.panicScore = applyResult.newScore; + tracker.panicLevel = applyResult.newLevel as PanicLevel; + tracker.panicRevision = writtenRevision; + } + emit(directory, 'panic', { + event: 'panic_score_delta', + source: 'gryph', + delta: applyResult.newScore - readState.panicScore, + from_score: readState.panicScore, + to_score: applyResult.newScore, + from_level: readState.panicLevel, + to_level: applyResult.newLevel, + provenance: applyResult.provenance, + }); + return; + } + // Conflict on first attempt — re-read and retry once + if (attempt === 0) { + readState = readPanicState(directory); + applyResult = applySnapshotDelta(snapshot, readState, staleDepth); + if (applyResult.newScore === readState.panicScore && applyResult.newLevel === readState.panicLevel) return; + } + } + // Both CAS attempts failed — skip this poll cycle, try again next interval + } catch { + // fail-open: no error propagates + } finally { + isPolling = false; + } + }; + + // While loop: sleep-before-poll preserves "first poll after one interval" semantics. + // Sequential await eliminates setInterval's timer drift and stop lifecycle races. + const run = async (): Promise => { + while (!stopped) { + await new Promise(r => setTimeout(r, intervalMs)); + if (!stopped) await poll(); + } + }; + void run(); + + const stop = (): void => { + stopped = true; + _pollerRegistry.delete(directory); + }; + _pollerRegistry.set(directory, stop); + return stop; +} + +// ============================================================================ +// BACKWARD COMPAT — panic-check.ts enrichment path (sync, pre-existing) +// ============================================================================ + +/** + * Synchronous Gryph query for the panic-check hook enrichment path. + * Returns null when Gryph is absent or any error occurs. + */ +export function queryGryphSignals(since: string): GryphSignals | null { + try { + if (!isGryphAvailable()) return null; + + const execEvents = queryGryphSync('exec', since) as GryphExecEvent[]; + const writeEvents = queryGryphSync('write', since) as GryphWriteEvent[]; + + const commands = execEvents.map(e => e.Command ?? e.command ?? e.cmd ?? '').filter(Boolean); + const commandEntropy = computeCommandEntropy(commands); + const hasFailures = execEvents.some(e => { + const status = e.ResultStatus ?? e.result_status; + return status === 'error' || (e.ExitCode ?? e.exit_code ?? e.exitCode ?? 0) !== 0; + }); + const repetitiveRetryBurst = commandEntropy < GRYPH_ENTROPY_LOW_THRESHOLD && hasFailures; + + const locs = writeEvents.map(e => e.LinesAdded ?? e.lines ?? e.loc ?? e.additions ?? 0); + const largePatchLoc = locs.length > 0 ? Math.max(...locs) : 0; + const largePatchWhileStale = largePatchLoc > GRYPH_LARGE_PATCH_LOC_THRESHOLD; + + return { commandEntropy, repetitiveRetryBurst, largePatchWhileStale, largePatchLoc }; + } catch { + return null; + } +} + +/** + * Apply Gryph-derived score deltas (backward compat — panic-check enrichment). + */ +export function applyGryphDelta( + baseScore: number, + signals: GryphSignals, + isStale: boolean, + triggers: string[], +): number { + let delta = 0; + + if (signals.repetitiveRetryBurst) { + delta += GRYPH_RETRY_BURST_DELTA; + triggers.push('repetitive_retry_burst'); + } + + if (signals.largePatchWhileStale && isStale) { + const attenuated = signals.commandEntropy > GRYPH_ENTROPY_HIGH_THRESHOLD; + delta += attenuated ? GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA : GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA; + triggers.push(attenuated ? 'large_patch_attenuated' : 'large_patch_stale'); + } + + return Math.min(PANIC_SCORE_MAX, Math.max(0, baseScore + delta)); +} diff --git a/src/core/services/mcp-handlers/panic-constants.ts b/src/core/services/mcp-handlers/panic-constants.ts new file mode 100644 index 00000000..5c9ccc7d --- /dev/null +++ b/src/core/services/mcp-handlers/panic-constants.ts @@ -0,0 +1,130 @@ +/** + * Panic Response Layer — centralized constants. + * + * Single source of truth for all numeric thresholds, weights, cooldowns, and + * timing values used across the panic subsystem (panic-response.ts, + * epistemic-lease.ts). Exported so tests can reference these values directly + * rather than hard-coding snapshots that silently diverge. + */ + +import type { PanicLevel } from './panic-response.js'; +import type { PanicCheckOutput } from './panic-response.js'; + +// ============================================================================ +// HYSTERESIS THRESHOLDS +// ============================================================================ + +/** Score required to transition upward from level N to N+1. */ +export const PANIC_UP_THRESHOLD: Record = { + 0: 30, + 1: 50, + 2: 70, + 3: 90, +}; + +/** Score below which level N drops to N−1. Separate from UP to prevent thrashing. */ +export const PANIC_DOWN_THRESHOLD: Record = { + 1: 20, + 2: 40, + 3: 60, + 4: 80, +}; + +// ============================================================================ +// SIGNAL WEIGHTS +// ============================================================================ + +/** Trajectory burst signal: density ≥ threshold fires this delta. */ +export const PANIC_TRAJECTORY_DENSITY = 0.60; +export const PANIC_TRAJECTORY_DELTA = 15; + +/** Oscillation spike signal: oscillation ≥ threshold fires this delta. */ +export const PANIC_OSCILLATION_THRESHOLD = 0.50; +export const PANIC_OSCILLATION_DELTA = 10; + +/** Stale-depth-3 persistence signal (gated by localityConfidence < threshold). */ +export const PANIC_STALE_D3_LOCALITY_GATE = 0.5; +export const PANIC_STALE_D3_DELTA = 25; + +/** Locality recovery: per-call score reduction when agent is stable. */ +export const PANIC_LOCALITY_RECOVERY = 3; + +/** Passive wall-clock decay: score reduction per elapsed minute. */ +export const PANIC_DECAY_PER_MIN = 5; + +/** Hard ceiling on panic score. */ +export const PANIC_SCORE_MAX = 100; + +// ============================================================================ +// TIMING +// ============================================================================ + +/** Post-orient() refractory window — upward signals suppressed for this long. */ +export const PANIC_REFRACTORY_MS = 45_000; + +/** Session expiry — panic state older than this is discarded on read. */ +export const PANIC_SESSION_EXPIRY_MS = 30 * 60 * 1000; + +// ============================================================================ +// HOOK COOLDOWNS +// ============================================================================ + +/** + * Minimum ms between hook interventions per panic level. + * Prevents context saturation and habituation from repeated injection. + * L4 = 0: every tool call warned at critical level. + */ +export const HOOK_COOLDOWN_MS: Record = { + 0: 0, + 1: 120_000, + 2: 60_000, + 3: 30_000, + 4: 0, +}; + +// ============================================================================ +// GRYPH SIGNAL WEIGHTS +// ============================================================================ + +/** Repetitive retry burst (low entropy + failing commands). */ +export const GRYPH_RETRY_BURST_DELTA = 15; + +/** Large patch while stale, low command entropy (non-deliberate). */ +export const GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA = 30; + +/** Large patch while stale, high command entropy (deliberate refactor — attenuated). */ +export const GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA = 10; + +/** LOC threshold for "large patch" classification. */ +export const GRYPH_LARGE_PATCH_LOC_THRESHOLD = 500; + +/** Command entropy below this = low-diversity / retry loop. */ +export const GRYPH_ENTROPY_LOW_THRESHOLD = 0.30; + +/** Command entropy above this = deliberate exploratory work (attenuation gate). */ +export const GRYPH_ENTROPY_HIGH_THRESHOLD = 0.60; + +/** Failure rate above this triggers burst signal regardless of entropy (mixed-window robustness). */ +export const GRYPH_FAILING_RATE_THRESHOLD = 0.30; + +// ============================================================================ +// GRYPH POLLING +// ============================================================================ + +/** Default poll interval for background Gryph behavioral ingestion. */ +export const GRYPH_POLL_INTERVAL_MS = 15_000; + +/** Minimum allowed poll interval (env override floor). */ +export const GRYPH_POLL_INTERVAL_MIN_MS = 5_000; + +// ============================================================================ +// SEVERITY MAP +// ============================================================================ + +export const SEVERITY_MAP: Record = { + 0: undefined, + 1: 'elevated', + 2: 'panic', + 3: 'scope', + 4: 'critical', +}; diff --git a/src/core/services/mcp-handlers/panic-response.test.ts b/src/core/services/mcp-handlers/panic-response.test.ts new file mode 100644 index 00000000..b585a49f --- /dev/null +++ b/src/core/services/mcp-handlers/panic-response.test.ts @@ -0,0 +1,251 @@ +/** + * Tests for panic-response.ts + * - applyPanicHysteresis + * - readPanicState / writePanicState + * - buildPanicCheckOutput + * - getPanicSignalText + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { mkdtemp, mkdir, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { + applyPanicHysteresis, + defaultPanicState, + readPanicState, + writePanicState, + casWritePanicState, + buildPanicCheckOutput, + getPanicSignalText, +} from './panic-response.js'; +import type { PanicState, PanicLevel } from './panic-response.js'; +import { + PANIC_UP_THRESHOLD, + PANIC_DOWN_THRESHOLD, + HOOK_COOLDOWN_MS, + PANIC_SESSION_EXPIRY_MS, +} from './panic-constants.js'; +import { OPENLORE_DIR } from '../../../constants.js'; + +// ============================================================================ +// applyPanicHysteresis +// ============================================================================ + +describe('applyPanicHysteresis', () => { + it('stays 0 below up-threshold', () => { + expect(applyPanicHysteresis(0, PANIC_UP_THRESHOLD[0] - 1, 0)).toBe(0); + }); + + it('transitions 0→1 at up-threshold', () => { + expect(applyPanicHysteresis(0, PANIC_UP_THRESHOLD[0], 0)).toBe(1); + }); + + it('transitions 1→2 at up-threshold', () => { + expect(applyPanicHysteresis(1, PANIC_UP_THRESHOLD[1], 0)).toBe(2); + }); + + it('transitions 2→3 at up-threshold', () => { + expect(applyPanicHysteresis(2, PANIC_UP_THRESHOLD[2], 0)).toBe(3); + }); + + it('L3→L4 requires staleDepth ≥ 3', () => { + expect(applyPanicHysteresis(3, PANIC_UP_THRESHOLD[3], 2)).toBe(3); + expect(applyPanicHysteresis(3, PANIC_UP_THRESHOLD[3], 3)).toBe(4); + }); + + it('does not downgrade when score above down-threshold', () => { + expect(applyPanicHysteresis(2, PANIC_DOWN_THRESHOLD[2] + 1, 0)).toBe(2); + }); + + it('downgrade 2→1 when score below down-threshold', () => { + expect(applyPanicHysteresis(2, PANIC_DOWN_THRESHOLD[2] - 1, 0)).toBe(1); + }); + + it('downgrade 3→2 when score below down-threshold', () => { + expect(applyPanicHysteresis(3, PANIC_DOWN_THRESHOLD[3] - 1, 0)).toBe(2); + }); + + it('no simultaneous up and down transition', () => { + expect(applyPanicHysteresis(0, PANIC_UP_THRESHOLD[0], 0)).toBe(1); + }); + + it('panic ceiling: staleDepth ≥ 3 floors minimum at L2', () => { + // even score 0 → at least L2 when staleDepth=3 + expect(applyPanicHysteresis(0, 0, 3)).toBe(2); + }); + + it('panic ceiling: staleDepth ≥ 2 floors minimum at L1', () => { + expect(applyPanicHysteresis(0, 0, 2)).toBe(1); + }); + + it('panic ceiling: staleDepth 0 no floor', () => { + expect(applyPanicHysteresis(0, 0, 0)).toBe(0); + }); + + it('L4 stays at L4 — no upward beyond max', () => { + expect(applyPanicHysteresis(4, 100, 3)).toBe(4); + }); +}); + +// ============================================================================ +// readPanicState / writePanicState +// ============================================================================ + +describe('readPanicState', () => { + let dir: string; + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'openlore-panic-test-')); + await mkdir(join(dir, OPENLORE_DIR), { recursive: true }); + }); + + it('returns defaultPanicState when file missing (fail-open)', () => { + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + expect(state.panicScore).toBe(0); + expect(state.schemaVersion).toBe(1); + }); + + it('returns defaultPanicState on parse error (fail-open)', async () => { + await writeFile(join(dir, OPENLORE_DIR, 'panic-state.json'), 'not-json', 'utf-8'); + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + }); + + it('returns defaultPanicState on wrong schema version (fail-open)', async () => { + const bad = JSON.stringify({ schemaVersion: 99, panicScore: 80, panicLevel: 3, updatedAt: new Date().toISOString() }); + await writeFile(join(dir, OPENLORE_DIR, 'panic-state.json'), bad, 'utf-8'); + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + }); + + it('returns defaultPanicState when session expired', async () => { + const old = new Date(Date.now() - PANIC_SESSION_EXPIRY_MS - 60_000).toISOString(); + const expired: PanicState = { ...defaultPanicState(), panicScore: 80, panicLevel: 3, updatedAt: old, lastOrientAt: old }; + await writeFile(join(dir, OPENLORE_DIR, 'panic-state.json'), JSON.stringify(expired), 'utf-8'); + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + }); + + it('round-trips state within session', () => { + const initial: PanicState = { + ...defaultPanicState(), + panicScore: 55, + panicLevel: 2, + triggers: ['oscillation'], + }; + writePanicState(dir, initial); + const read = readPanicState(dir); + expect(read.panicScore).toBe(55); + expect(read.panicLevel).toBe(2); + expect(read.triggers).toEqual(['oscillation']); + }); +}); + +// ============================================================================ +// buildPanicCheckOutput +// ============================================================================ + +describe('buildPanicCheckOutput', () => { + it('returns allow at level 0', () => { + const out = buildPanicCheckOutput(defaultPanicState()); + expect(out.decision).toBe('allow'); + expect(out.severity).toBeUndefined(); + }); + + it('returns warn at level 1 with no prior intervention', () => { + const state: PanicState = { ...defaultPanicState(), panicLevel: 1 }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('warn'); + expect(out.severity).toBe('elevated'); + expect(out.message).toContain('[PANIC:ELEVATED]'); + }); + + it('returns allow when within L1 cooldown', () => { + const recentIntervention = new Date(Date.now() - HOOK_COOLDOWN_MS[1] / 2).toISOString(); + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 1, + lastHookInterventionAt: recentIntervention, + }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('allow'); + }); + + it('returns warn when L1 cooldown expired', () => { + const oldIntervention = new Date(Date.now() - HOOK_COOLDOWN_MS[1] - 10_000).toISOString(); + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 1, + lastHookInterventionAt: oldIntervention, + }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('warn'); + }); + + it('L4 always fires regardless of cooldown', () => { + const recentIntervention = new Date(Date.now() - 1_000).toISOString(); + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 4, + lastHookInterventionAt: recentIntervention, + }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('warn'); + expect(out.severity).toBe('critical'); + }); + + it('switches to directive message at interventionCountSinceStable ≥ 3', () => { + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 2, + interventionCountSinceStable: 3, + }; + const out = buildPanicCheckOutput(state); + expect(out.message).toContain('[PANIC:PLANNING:DIRECTIVE]'); + }); + + it('uses advisory message at interventionCountSinceStable < 3', () => { + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 2, + interventionCountSinceStable: 2, + }; + const out = buildPanicCheckOutput(state); + expect(out.message).toContain('[PANIC:PLANNING]'); + expect(out.message).not.toContain('DIRECTIVE'); + }); + + it('severity map: L1→elevated, L2→panic, L3→scope, L4→critical', () => { + const levels: [PanicLevel, string][] = [[1, 'elevated'], [2, 'panic'], [3, 'scope'], [4, 'critical']]; + for (const [level, expected] of levels) { + const state: PanicState = { ...defaultPanicState(), panicLevel: level }; + const out = buildPanicCheckOutput(state); + expect(out.severity).toBe(expected); + } + }); +}); + +// ============================================================================ +// getPanicSignalText +// ============================================================================ + +describe('getPanicSignalText', () => { + it('returns null at level 0', () => { + expect(getPanicSignalText(defaultPanicState())).toBeNull(); + }); + + it('returns advisory text at level 1', () => { + const state: PanicState = { ...defaultPanicState(), panicLevel: 1 }; + const text = getPanicSignalText(state); + expect(text).not.toBeNull(); + expect(text).toContain('[PANIC:ELEVATED]'); + }); + + it('returns directive text when interventionCountSinceStable ≥ 3', () => { + const state: PanicState = { ...defaultPanicState(), panicLevel: 3, interventionCountSinceStable: 3 }; + const text = getPanicSignalText(state); + expect(text).toContain('DIRECTIVE'); + }); +}); diff --git a/src/core/services/mcp-handlers/panic-response.ts b/src/core/services/mcp-handlers/panic-response.ts new file mode 100644 index 00000000..be18ab21 --- /dev/null +++ b/src/core/services/mcp-handlers/panic-response.ts @@ -0,0 +1,238 @@ +/** + * Panic Response Layer — behavioral destabilization detection. + * + * Separate from EpistemicLease (freshness = epistemic authority decay). + * Panic = observable behavioral instability: oscillation, trajectory bursts, + * repeated stale-depth-3 persistence. + * + * State file: .openlore/panic-state.json (atomic writes, fail-open reads). + * Hook consumer: `openlore panic-check` reads this file before every agent tool call. + */ + +import { writeFileSync, renameSync, readFileSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { OPENLORE_DIR } from '../../../constants.js'; +import { + PANIC_UP_THRESHOLD, + PANIC_DOWN_THRESHOLD, + HOOK_COOLDOWN_MS, + SEVERITY_MAP, + PANIC_SESSION_EXPIRY_MS, +} from './panic-constants.js'; + +// ============================================================================ +// TYPES +// ============================================================================ + +export type PanicLevel = 0 | 1 | 2 | 3 | 4; + +export interface PanicState { + schemaVersion: 1; + panicScore: number; + panicLevel: PanicLevel; + updatedAt: string; + lastOrientAt: string; + lastHookInterventionAt?: string; + recentOrientCount: number; + localityConfidence: number; + interventionCountSinceStable: number; + triggers: string[]; + /** ISO — upward signals suppressed until this timestamp after an orient() recovery. */ + panicRecoverySuppressionUntil?: string; + /** ISO — start of the Gryph query window for the panic-check hook path. Advanced on each intervention write. */ + gryphWindowStart?: string; + agentId?: string; + sessionId?: string; + /** Monotonically increasing write counter. Used for CAS by concurrent writers (Gryph poll vs MCP). */ + revision: number; +} + +export interface PanicCheckOutput { + decision: 'allow' | 'warn'; + severity?: 'elevated' | 'panic' | 'scope' | 'critical'; + message?: string; +} + +// ============================================================================ +// CONSTANTS +// ============================================================================ + +const PANIC_STATE_FILE = 'panic-state.json'; + +// ============================================================================ +// HYSTERESIS +// ============================================================================ + +export function applyPanicHysteresis(current: PanicLevel, score: number, staleDepth: number): PanicLevel { + let level = current; + + // Attempt upward transition + if (level < 4) { + if (level === 3) { + // L3→L4 requires both score threshold AND staleDepth ≥ 3 + if (score >= PANIC_UP_THRESHOLD[3] && staleDepth >= 3) level = 4; + } else { + if (score >= PANIC_UP_THRESHOLD[level]) level = (level + 1) as PanicLevel; + } + } + + // Attempt downward transition (only if we did not just go up) + if (level === current && level > 0) { + if (score < PANIC_DOWN_THRESHOLD[level]) level = (level - 1) as PanicLevel; + } + + // Panic ceiling: stale depth floors minimum level + const minLevel: PanicLevel = staleDepth >= 3 ? 2 : staleDepth >= 2 ? 1 : 0; + return Math.max(level, minLevel) as PanicLevel; +} + +// ============================================================================ +// STATE I/O +// ============================================================================ + +export function defaultPanicState(): PanicState { + const now = new Date().toISOString(); + return { + schemaVersion: 1, + panicScore: 0, + panicLevel: 0, + updatedAt: now, + lastOrientAt: now, + recentOrientCount: 0, + localityConfidence: 0, + interventionCountSinceStable: 0, + triggers: [], + revision: 0, + }; +} + +/** + * Reads panic state. Fails open on all error paths: + * missing file, parse error, wrong schema version, expired session. + */ +export function readPanicState(directory: string): PanicState { + try { + const path = join(directory, OPENLORE_DIR, PANIC_STATE_FILE); + if (!existsSync(path)) return defaultPanicState(); + + const raw = readFileSync(path, 'utf-8'); + const parsed = JSON.parse(raw) as Partial; + + if (parsed.schemaVersion !== 1) return defaultPanicState(); + + // Session hard reset: zombie state from a previous session must not leak + if (parsed.updatedAt) { + const age = Date.now() - new Date(parsed.updatedAt).getTime(); + if (age > PANIC_SESSION_EXPIRY_MS) return defaultPanicState(); + } + + return { ...defaultPanicState(), ...parsed, schemaVersion: 1, revision: parsed.revision ?? 0 }; + } catch { + return defaultPanicState(); + } +} + +/** + * Atomically writes panic state. POSIX rename(2) is atomic on same filesystem. + * Bumps revision on every write — callers sync their own revision counter from the return value. + * Never throws — must not crash the hot path. + * Returns the new revision written (or the existing revision if write failed). + */ +export function writePanicState(directory: string, state: PanicState): number { + const newRevision = (state.revision ?? 0) + 1; + try { + const path = join(directory, OPENLORE_DIR, PANIC_STATE_FILE); + const tmp = `${path}.tmp`; + writeFileSync(tmp, JSON.stringify({ ...state, revision: newRevision }, null, 2), 'utf-8'); + renameSync(tmp, path); + return newRevision; + } catch { + // never crash the hot path + return state.revision ?? 0; + } +} + +/** + * Compare-and-swap write for concurrent writers (Gryph poll path). + * All ops are synchronous — no await between read and write — so this is atomic + * within the Node.js event loop (no interleaving at JS level). + * Returns false if on-disk revision !== expectedRevision (stale read → caller retries). + */ +export function casWritePanicState( + directory: string, + expectedRevision: number, + state: PanicState, +): boolean { + try { + const path = join(directory, OPENLORE_DIR, PANIC_STATE_FILE); + const currentRevision = existsSync(path) + ? (() => { + try { return (JSON.parse(readFileSync(path, 'utf-8')) as Partial).revision ?? 0; } + catch { return 0; } + })() + : 0; + if (currentRevision !== expectedRevision) return false; + const tmp = `${path}.tmp`; + writeFileSync(tmp, JSON.stringify({ ...state, revision: expectedRevision + 1 }, null, 2), 'utf-8'); + renameSync(tmp, path); + return true; + } catch { + return false; + } +} + +// ============================================================================ +// PANIC CHECK OUTPUT (hook response builder) +// ============================================================================ + +const ADVISORY_MESSAGES: Record = { + 0: '', + 1: '[PANIC:ELEVATED] Recent navigation suggests increasing architectural uncertainty.\nConsider: summarize current assumptions, identify uncertain dependencies, call orient().', + 2: '[PANIC:PLANNING] Before cross-module modification, state:\n1. Intended architectural impact 2. Modules affected 3. Rollback strategy\nThen proceed.', + 3: '[PANIC:SCOPE] Cross-module writes discouraged until orient().\nPrefer local changes. orient() expands operational scope.', + 4: '[PANIC:CRITICAL] Critical epistemic instability. Call orient() before further modifications.', +}; + +const DIRECTIVE_MESSAGES: Record = { + 0: '', + 1: '[PANIC:ELEVATED:DIRECTIVE] Previous checkpoint ignored. Stop and call orient() now.', + 2: '[PANIC:PLANNING:DIRECTIVE] Previous checkpoint ignored. Stop. Run orient() now before proceeding.', + 3: '[PANIC:SCOPE:DIRECTIVE] Scope reduction warning ignored. Stop all cross-module writes. Call orient() immediately.', + 4: '[PANIC:CRITICAL] Critical epistemic instability. Call orient() before further modifications.', +}; + +/** + * Builds the structured output for the panic-check CLI hook consumer. + * Always exits 0 — severity encoded in payload, not exit code. + * Applies per-level cooldown: no-ops if intervention fired recently. + */ +export function buildPanicCheckOutput(state: PanicState): PanicCheckOutput { + if (state.panicLevel === 0) return { decision: 'allow' }; + + // Apply cooldown (L4 is exempt — always fires) + if (state.panicLevel < 4 && state.lastHookInterventionAt) { + const elapsed = Date.now() - new Date(state.lastHookInterventionAt).getTime(); + if (elapsed < HOOK_COOLDOWN_MS[state.panicLevel]) return { decision: 'allow' }; + } + + const isDirective = state.interventionCountSinceStable >= 3; + const messages = isDirective ? DIRECTIVE_MESSAGES : ADVISORY_MESSAGES; + const message = messages[state.panicLevel]; + + return { + decision: 'warn', + severity: SEVERITY_MAP[state.panicLevel], + message, + }; +} + +/** + * Returns panic signal text for MCP tool response injection. + * Appended after result (not prepended) to preserve JSON structure. + */ +export function getPanicSignalText(state: PanicState): string | null { + if (state.panicLevel === 0) return null; + const isDirective = state.interventionCountSinceStable >= 3; + const messages = isDirective ? DIRECTIVE_MESSAGES : ADVISORY_MESSAGES; + return messages[state.panicLevel] ?? null; +} diff --git a/src/core/services/telemetry.ts b/src/core/services/telemetry.ts index fdf4c01e..fc93c9a4 100644 --- a/src/core/services/telemetry.ts +++ b/src/core/services/telemetry.ts @@ -4,15 +4,30 @@ * Gate: OPENLORE_TELEMETRY=1 (disabled by default). * Writes append-only JSONL to .openlore/telemetry/.jsonl. * Never throws — telemetry must not crash the hot path. + * + * Rotation: when a domain file exceeds ROTATE_THRESHOLD_BYTES, it is renamed + * to .1.jsonl and older rotated files shifted (keeps MAX_ROTATED_FILES). */ -import { appendFileSync, mkdirSync } from 'node:fs'; +import { appendFileSync, mkdirSync, renameSync, statSync, unlinkSync } from 'node:fs'; import { join } from 'node:path'; import { OPENLORE_DIR } from '../../constants.js'; const TELEMETRY_SUBDIR = 'telemetry'; +const ROTATE_THRESHOLD_BYTES = 50 * 1024 * 1024; // 50 MB +const MAX_ROTATED_FILES = 5; const _createdDirs = new Set(); +function rotateTelemetryFile(filePath: string): void { + // Shift existing rotated files: .5.jsonl deleted, .4 → .5, …, .1 → .2 + const base = filePath.replace(/\.jsonl$/, ''); + try { unlinkSync(`${base}.${MAX_ROTATED_FILES}.jsonl`); } catch { /* not present */ } + for (let i = MAX_ROTATED_FILES - 1; i >= 1; i--) { + try { renameSync(`${base}.${i}.jsonl`, `${base}.${i + 1}.jsonl`); } catch { /* not present */ } + } + try { renameSync(filePath, `${base}.1.jsonl`); } catch { /* rename failed — continue writing */ } +} + /** * Emit a telemetry event to .openlore/telemetry/.jsonl. * @@ -30,8 +45,14 @@ export function emit( try { const dir = join(directory, OPENLORE_DIR, TELEMETRY_SUBDIR); if (!_createdDirs.has(dir)) { mkdirSync(dir, { recursive: true }); _createdDirs.add(dir); } + const filePath = join(dir, `${domain}.jsonl`); + // Rotate before writing if file exceeds threshold + try { + const { size } = statSync(filePath); + if (size >= ROTATE_THRESHOLD_BYTES) rotateTelemetryFile(filePath); + } catch { /* file doesn't exist yet */ } const line = JSON.stringify({ ts: new Date().toISOString(), ...payload }) + '\n'; - appendFileSync(join(dir, `${domain}.jsonl`), line, 'utf-8'); + appendFileSync(filePath, line, 'utf-8'); } catch { // never crash the hot path } diff --git a/src/types/index.ts b/src/types/index.ts index 159d7a00..177b802a 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -5,6 +5,12 @@ // Project detection types export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'java' | 'ruby' | 'php' | 'unknown'; +// Panic response impact level +// off: panic subsystem disabled. Freshness/epistemic tracking always runs regardless. (default) +// observe: panic scoring + state file, no intervention — observe the engine without acting +// advisory / experimental_blocking: full pipeline with response injection / block signal +export type PanicResponseMode = 'off' | 'observe' | 'advisory' | 'experimental_blocking'; + // Configuration types export interface OpenLoreConfig { version: string; @@ -14,6 +20,21 @@ export interface OpenLoreConfig { generation: GenerationConfig; llm?: LLMConfig; embedding?: EmbeddingConfig; + panicResponse?: { + /** + * Controls the panic response subsystem. Default: 'off'. + * + * 'off' disables: panic scoring, panic state persistence, panic interventions, + * panic telemetry, panic hook output. + * Behavioral metrics required by the freshness engine (density, oscillation, + * localityConfidence) continue to be computed in-memory as part of EpistemicLease. + * 'observe': panic scoring + state written, no intervention (collect only). + * 'advisory': full pipeline with L2+ response injection. + * 'experimental_blocking': advisory + runtime-mediated block signal at L4. + * advisory:true is always present in the payload — runtime decides enforcement. + */ + mode: PanicResponseMode; + }; createdAt: string; lastRun: string | null; }