From cdb1d077db2bf92f44ec122ebeb31f2f6f337f54 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Tue, 19 May 2026 22:04:36 +0200 Subject: [PATCH 01/22] feat(panic): implement Panic Response Layer Behavioral destabilization detection orthogonal to EpistemicLease. Panic = oscillation + trajectory bursts + stale-depth persistence. EpistemicLease = freshness/authority decay. Neither implies the other. Architecture: - panic-response.ts: PanicState I/O (atomic writes), hysteresis engine (separate up/down thresholds L0-L4), buildPanicCheckOutput, signal text - epistemic-lease.ts: EpistemicTracker extended with panicScore, panicLevel, localityConfidence, recentOrientCount, interventionCountSinceStable; updatePanic() called on every tool call; orient spam protection in resetTracker() (-40 normal / -15 rapid / 0 at 3+ rapid orients); trackerToPanicState() export - mcp.ts: writes panic-state.json after every updateTracker; appends panic signal as separate content item (never corrupts JSON result body); increments interventionCountSinceStable on injection - panic-check.ts: new CLI command for PreToolUse hook consumers; reads panic-state.json, applies cooldown, always exits 0 - index.ts: registers panic-check command Telemetry (panic.jsonl, gated by OPENLORE_TELEMETRY=1): - panic_level_change: every L0-L4 transition with trigger (score/ceiling) - panic_orient_reset: orient kind (normal/rapid/spam), delta, from/to level - panic_signal_injected: tool, agent, directive_mode flag - hook_intervention: channel, severity, directive_mode - panic_level + panic_score added to every mcp tool_call event Tests: 101 passing (26 new in panic-response.test.ts, 26 new in epistemic-lease.test.ts covering hysteresis, state I/O, fail-open, cooldowns, directive mode, orient spam protection, trackerToPanicState) --- src/cli/commands/mcp.ts | 47 +++- src/cli/commands/panic-check.ts | 43 +++ src/cli/index.ts | 2 + .../mcp-handlers/epistemic-lease.test.ts | 155 ++++++++++- .../services/mcp-handlers/epistemic-lease.ts | 126 ++++++++- .../mcp-handlers/panic-response.test.ts | 245 ++++++++++++++++++ .../services/mcp-handlers/panic-response.ts | 202 +++++++++++++++ 7 files changed, 808 insertions(+), 12 deletions(-) create mode 100644 src/cli/commands/panic-check.ts create mode 100644 src/core/services/mcp-handlers/panic-response.test.ts create mode 100644 src/core/services/mcp-handlers/panic-response.ts diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index ab23c47c..de0124a7 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -31,8 +31,9 @@ import { } from '@modelcontextprotocol/sdk/types.js'; import { sanitizeMcpError, validateDirectory } from '../../core/services/mcp-handlers/utils.js'; -import { createTracker, updateTracker, getFreshnessSignal } from '../../core/services/mcp-handlers/epistemic-lease.js'; +import { createTracker, updateTracker, getFreshnessSignal, trackerToPanicState } from '../../core/services/mcp-handlers/epistemic-lease.js'; import type { EpistemicTracker } from '../../core/services/mcp-handlers/epistemic-lease.js'; +import { writePanicState, getPanicSignalText } from '../../core/services/mcp-handlers/panic-response.js'; import { emit } from '../../core/services/telemetry.js'; import { DEFAULT_DRIFT_MAX_FILES } from '../../constants.js'; import { @@ -1356,7 +1357,10 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { trackerDir = directory; } // Update epistemic state before dispatch (orient resets tracker internally) - if (tracker && directory) updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); + if (tracker && directory) { + updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); + writePanicState(directory, trackerToPanicState(tracker, agentName)); + } let result: unknown; @@ -1540,19 +1544,42 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { }; } - emit(directory, 'mcp', { event: 'tool_call', tool: name, ms: Date.now() - _t0, agent: agentName, agent_version: agentVersion }); + emit(directory, 'mcp', { + event: 'tool_call', tool: name, ms: Date.now() - _t0, agent: agentName, agent_version: agentVersion, + panic_level: tracker?.panicLevel ?? 0, + panic_score: tracker?.panicScore ?? 0, + }); const text = typeof result === 'string' ? result : JSON.stringify(result, null, 2); const signal = tracker ? getFreshnessSignal(tracker) : null; - // Freshness signal is a separate content item — never concatenated into - // the result body — so structured outputs (JSON, patches) are not corrupted. - const content: Array<{ type: 'text'; text: string }> = signal - ? signal.prepend - ? [{ type: 'text', text: signal.text }, { type: 'text', text }] - : [{ type: 'text', text }, { type: 'text', text: signal.text }] - : [{ type: 'text', text }]; + // Both freshness and panic signals are separate content items — never + // concatenated into the result body — so structured outputs (JSON, patches) + // are not corrupted. Panic signal always appended (after result). + const content: Array<{ type: 'text'; text: string }> = []; + if (signal?.prepend) content.push({ type: 'text', text: signal.text }); + content.push({ type: 'text', text }); + if (signal && !signal.prepend) content.push({ type: 'text', text: signal.text }); + + if (tracker) { + const panicState = trackerToPanicState(tracker, agentName); + const panicText = getPanicSignalText(panicState); + if (panicText) { + content.push({ type: 'text', text: panicText }); + tracker.interventionCountSinceStable++; + writePanicState(directory, trackerToPanicState(tracker, agentName)); + emit(directory, 'panic', { + event: 'panic_signal_injected', + panic_level: tracker.panicLevel, + panic_score: tracker.panicScore, + intervention_count: tracker.interventionCountSinceStable, + directive_mode: tracker.interventionCountSinceStable >= 3, + tool: name, + agent: agentName, + }); + } + } return { content }; } catch (err) { diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts new file mode 100644 index 00000000..ffc6b04c --- /dev/null +++ b/src/cli/commands/panic-check.ts @@ -0,0 +1,43 @@ +/** + * openlore panic-check + * + * Reads panic-state.json and outputs a structured JSON decision for the + * Claude Code PreToolUse hook. Always exits 0 — severity is encoded in + * the payload, not the exit code, so the hook runtime never sees an error. + * + * Designed for minimal startup overhead: imports only node built-ins and + * constants. Heavy MCP dependencies are never loaded. + */ + +import { Command } from 'commander'; +import { readPanicState, writePanicState, buildPanicCheckOutput } from '../../core/services/mcp-handlers/panic-response.js'; +import { emit } from '../../core/services/telemetry.js'; + +export const panicCheckCommand = new Command('panic-check') + .description('Check current panic level (PreToolUse hook consumer)') + .option('-d, --directory ', 'Project directory', process.cwd()) + .action((options: { directory: string }) => { + const dir = options.directory; + const state = readPanicState(dir); + const output = buildPanicCheckOutput(state); + + if (output.decision === 'warn') { + const newCount = state.interventionCountSinceStable + 1; + writePanicState(dir, { + ...state, + lastHookInterventionAt: new Date().toISOString(), + interventionCountSinceStable: newCount, + }); + emit(dir, 'panic', { + event: 'hook_intervention', + channel: 'pre_tool_use', + panic_level: state.panicLevel, + severity: output.severity, + directive_mode: newCount >= 3, + intervention_count: newCount, + }); + } + + process.stdout.write(JSON.stringify(output) + '\n'); + process.exit(0); + }); diff --git a/src/cli/index.ts b/src/cli/index.ts index 10298875..cee2674e 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -25,6 +25,7 @@ import { testCommand } from './commands/test.js'; import { digestCommand } from './commands/digest.js'; import { decisionsCommand } from './commands/decisions.js'; import { telemetryCommand } from './commands/telemetry.js'; +import { panicCheckCommand } from './commands/panic-check.js'; import { configureLogger } from '../utils/logger.js'; // Read version from package.json at runtime so it never drifts from the published version @@ -135,5 +136,6 @@ program.addCommand(testCommand); program.addCommand(digestCommand); program.addCommand(decisionsCommand); program.addCommand(telemetryCommand); +program.addCommand(panicCheckCommand); program.parse(); diff --git a/src/core/services/mcp-handlers/epistemic-lease.test.ts b/src/core/services/mcp-handlers/epistemic-lease.test.ts index 7d9ce4f4..44bb32b9 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.test.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.test.ts @@ -3,7 +3,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { createTracker, updateTracker, injectFreshness, getSourceRoots } from './epistemic-lease.js'; +import { createTracker, updateTracker, injectFreshness, getSourceRoots, trackerToPanicState } from './epistemic-lease.js'; import type { EpistemicTracker } from './epistemic-lease.js'; // ============================================================================ @@ -621,3 +621,156 @@ describe('updateTracker — V3.1 cross-module trajectory', () => { expect(t.moduleAccessWindow).toHaveLength(15); }); }); + +// ============================================================================ +// Panic — score accumulation and level transitions +// ============================================================================ + +describe('panic — score and level via updateTracker', () => { + it('starts at panicScore 0, panicLevel 0', () => { + const t = freshTracker(); + expect(t.panicScore).toBe(0); + expect(t.panicLevel).toBe(0); + }); + + it('panicScore increases with oscillation', () => { + const t = freshTracker(); + // Build A→B→A→B oscillation (bigram repetition) driving oscillation score up + for (let i = 0; i < 15; i++) { + const mod = i % 2 === 0 ? 'auth' : 'billing'; + updateTracker(t, 'search_code', '/fake/repo', `src/${mod}/x.ts`); + } + expect(t.panicScore).toBeGreaterThan(0); + }); + + it('panicLevel rises to 1 when panicScore >= 30', () => { + const t = freshTracker(); + t.panicScore = 29; + // One more call with high density should push it over 30 + t.moduleAccessWindow = ['auth','billing','auth','billing','auth','billing','auth','billing', + 'auth','billing','auth','billing','auth','billing','auth'] as (string|null)[]; + t.lastModule = 'auth'; + updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/billing/x.ts'); + expect(t.panicLevel).toBeGreaterThanOrEqual(1); + }); + + it('staleDepth floors panicLevel via panic ceiling (staleDepth=3 → min L2)', () => { + const t = freshTracker(); + t.panicScore = 0; + // Force stale at depth 3 + t.freshnessState = 'stale'; + t.staleDepth = 3; + updateTracker(t, 'list_spec_domains', '/fake/repo'); + // Panic ceiling: staleDepth≥3 → panicLevel ≥ 2 + expect(t.panicLevel).toBeGreaterThanOrEqual(2); + }); + + it('panicLevel resets interventionCountSinceStable when dropping to 0', () => { + const t = freshTracker(); + t.panicLevel = 1; + t.panicScore = 5; // below down-threshold for L1 (20) → drops to L0 + t.interventionCountSinceStable = 5; + updateTracker(t, 'list_spec_domains', '/fake/repo'); + expect(t.panicLevel).toBe(0); + expect(t.interventionCountSinceStable).toBe(0); + }); + + it('localityConfidence near 1 at low density', () => { + const t = freshTracker(); + updateTracker(t, 'search_code', '/fake/repo'); + expect(t.localityConfidence).toBeGreaterThan(0.9); + }); +}); + +// ============================================================================ +// Panic — orient spam protection +// ============================================================================ + +describe('panic — orient spam protection', () => { + beforeEach(() => { vi.useFakeTimers(); }); + afterEach(() => { vi.useRealTimers(); }); + + it('normal orient (>2min gap) applies -40 recovery', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); // 3min gap + updateTracker(t, 'orient', '/fake/repo'); + expect(t.panicScore).toBe(10); // 50 - 40 + }); + + it('rapid orient (<2min gap) applies only -15', () => { + const t = freshTracker(); + t.panicScore = 50; + // Simulate a prior orient 30s ago so the next orient is "rapid" + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 1; + updateTracker(t, 'orient', '/fake/repo'); + expect(t.panicScore).toBe(35); // 50 - 15 + }); + + it('3rd+ rapid orient applies 0 recovery (spam)', () => { + const t = freshTracker(); + t.panicScore = 50; + // Simulate 2 prior rapid orients (count already 2) + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 2; + updateTracker(t, 'orient', '/fake/repo'); // count=3 → spam, delta=0 + expect(t.panicScore).toBe(50); // no change + }); + + it('non-rapid orient resets spam counter', () => { + const t = freshTracker(); + t.panicScore = 50; + // Simulate: spam state (2 rapid orients), last orient was 30s ago + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 2; + // Now advance 3min — next orient will be non-rapid + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // counter reset to 0, +1 = 1, non-rapid → -40 + expect(t.panicScore).toBe(10); // 50 - 40 + expect(t.recentOrientCount).toBe(1); + }); + + it('panicScore never goes below 0', () => { + const t = freshTracker(); + t.panicScore = 10; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // -40 would give -30, clamped to 0 + expect(t.panicScore).toBe(0); + }); +}); + +// ============================================================================ +// trackerToPanicState +// ============================================================================ + +describe('trackerToPanicState', () => { + it('maps tracker fields to PanicState correctly', () => { + const t = freshTracker(); + t.panicScore = 42; + t.panicLevel = 1; + t.localityConfidence = 0.8; + t.recentOrientCount = 2; + t.interventionCountSinceStable = 1; + + const state = trackerToPanicState(t, 'claude-code', 'sess-123'); + + expect(state.schemaVersion).toBe(1); + expect(state.panicScore).toBe(42); + expect(state.panicLevel).toBe(1); + expect(state.localityConfidence).toBe(0.8); + expect(state.recentOrientCount).toBe(2); + expect(state.interventionCountSinceStable).toBe(1); + expect(state.agentId).toBe('claude-code'); + expect(state.sessionId).toBe('sess-123'); + expect(state.updatedAt).toBeTruthy(); + expect(state.lastOrientAt).toBeTruthy(); + }); + + it('agentId and sessionId are optional', () => { + const t = freshTracker(); + const state = trackerToPanicState(t); + expect(state.agentId).toBeUndefined(); + expect(state.sessionId).toBeUndefined(); + }); +}); diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 817f8836..a1de7960 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -34,6 +34,8 @@ import { ARTIFACT_CALL_GRAPH_DB, } from '../../../constants.js'; import { emit } from '../telemetry.js'; +import { applyPanicHysteresis } from './panic-response.js'; +import type { PanicLevel, PanicState } from './panic-response.js'; // ============================================================================ // TYPES @@ -63,6 +65,13 @@ export interface EpistemicTracker { lastSwitchAt: number; /** V3.2: oscillation score — repeated bigram transitions / total transitions [0,1]. */ oscillation: number; + // Panic fields — behavioral destabilization tracking (separate from freshness) + panicScore: number; + panicLevel: PanicLevel; + localityConfidence: number; + recentOrientCount: number; + lastOrientResetAt: number; + interventionCountSinceStable: number; } // ============================================================================ @@ -155,6 +164,56 @@ const SWITCH_DAMPENING_MS = 5_000; const BURST_DENSITY_THRESHOLD = 0.60; // density for post-stale burst escalation const BURST_TOOL_WEIGHT_THRESHOLD = 8; // tool weight for post-stale burst escalation +// Panic constants +const RAPID_ORIENT_INTERVAL_MS = 2 * 60 * 1000; // orients within 2min are "rapid" +const PANIC_SCORE_MAX = 100; + +// ============================================================================ +// PANIC UPDATE +// Called on every tool call with current density/oscillation signals. +// Score delta: positive from instability signals, negative from orient resets. +// ============================================================================ + +function updatePanic( + tracker: EpistemicTracker, + opts: { density: number; oscillation: number; weight: number; staleDepth: number; directory?: string }, +): void { + const { density, oscillation, weight, staleDepth, directory = '' } = opts; + + // Per-call score delta from behavioral signals + let delta = 0; + delta += density >= CROSS_MODULE_STALE_DENSITY ? 25 : density >= CROSS_MODULE_DEGRADE_DENSITY ? 10 : 0; + delta += Math.round(oscillation * 30); + // Large patch attenuation: when commandEntropy (approximated as oscillation < 0.1) suggests + // legitimate burst work (builds, tests), reduce weight contribution. + const isHighEntropy = oscillation < 0.1 && density >= CROSS_MODULE_DEGRADE_DENSITY; + delta += !isHighEntropy && weight >= BURST_TOOL_WEIGHT_THRESHOLD ? 20 : !isHighEntropy && weight >= 5 ? 10 : 0; + + tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + delta)); + tracker.localityConfidence = Math.max(0, 1 - density * 2); + + const prevLevel = tracker.panicLevel; + tracker.panicLevel = applyPanicHysteresis(tracker.panicLevel, tracker.panicScore, staleDepth); + + if (tracker.panicLevel !== prevLevel) { + const trigger = staleDepth >= 2 && tracker.panicLevel > prevLevel ? 'ceiling' : 'score'; + emit(directory, 'panic', { + event: 'panic_level_change', + from_level: prevLevel, + to_level: tracker.panicLevel, + panic_score: tracker.panicScore, + density, + oscillation, + stale_depth: staleDepth, + trigger, + }); + } + + if (tracker.panicLevel === 0 && prevLevel > 0) { + tracker.interventionCountSinceStable = 0; + } +} + // ============================================================================ // GIT HASH // ============================================================================ @@ -284,17 +343,62 @@ export function createTracker(directory: string): EpistemicTracker { lastDensityPenaltyAt: 0, lastSwitchAt: 0, oscillation: 0, + panicScore: 0, + panicLevel: 0, + localityConfidence: 1, + recentOrientCount: 0, + lastOrientResetAt: 0, + interventionCountSinceStable: 0, }; } function resetTracker(tracker: EpistemicTracker, directory: string): void { + const now = Date.now(); + + // Panic: orient spam protection — diminishing recovery bonus on rapid reuse + const timeSinceLastOrient = now - tracker.lastOrientResetAt; + if (timeSinceLastOrient >= RAPID_ORIENT_INTERVAL_MS) { + tracker.recentOrientCount = 0; // non-rapid: reset spam counter + } + tracker.recentOrientCount++; + tracker.lastOrientResetAt = now; + + let panicDelta: number; + let orientKind: 'normal' | 'rapid' | 'spam'; + if (tracker.recentOrientCount >= 3) { + panicDelta = 0; orientKind = 'spam'; + } else if (timeSinceLastOrient < RAPID_ORIENT_INTERVAL_MS) { + panicDelta = -15; orientKind = 'rapid'; + } else { + panicDelta = -40; orientKind = 'normal'; + } + + const prevScore = tracker.panicScore; + const prevLevel = tracker.panicLevel; + tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + panicDelta)); + tracker.localityConfidence = 1; + tracker.panicLevel = applyPanicHysteresis(tracker.panicLevel, tracker.panicScore, 0); + if (tracker.panicLevel === 0) tracker.interventionCountSinceStable = 0; + + emit(directory, 'panic', { + event: 'panic_orient_reset', + orient_kind: orientKind, + delta: panicDelta, + from_score: prevScore, + to_score: tracker.panicScore, + from_level: prevLevel, + to_level: tracker.panicLevel, + recent_orient_count: tracker.recentOrientCount, + time_since_last_ms: tracker.lastOrientResetAt === now ? timeSinceLastOrient : 0, + }); + tracker.lastOrientAt = new Date(); tracker.graphVersionAtOrient = getGitHash(directory); tracker.cognitiveLoad = 0; tracker.modulesVisited = new Set(); tracker.freshnessState = 'fresh'; tracker.staleDepth = 0; - tracker.lastGitCheckAt = Date.now(); + tracker.lastGitCheckAt = now; tracker.lastModule = null; tracker.moduleAccessWindow = []; tracker.lastDensityPenaltyAt = 0; @@ -356,6 +460,7 @@ export function updateTracker( density, oscillation, age_min: Math.floor(ageMs / 60_000), trigger: 'burst', }); tracker.staleDepth = 3; + updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory }); return; } const newDepth = computeStaleDepth(tracker.cognitiveLoad, ageMs); @@ -367,6 +472,7 @@ export function updateTracker( }); tracker.staleDepth = newDepth as StaleDepth; } + updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory }); return; } @@ -421,6 +527,8 @@ export function updateTracker( tracker.freshnessState = 'degraded'; emit(directory, 'epistemic-lease', { event: 'degraded', trigger, ...telCtx }); } + + updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory }); } // ============================================================================ @@ -521,3 +629,19 @@ export function injectFreshness(text: string, tracker: EpistemicTracker): string if (!signal) return text; return signal.prepend ? signal.text + text : text + signal.text; } + +export function trackerToPanicState(tracker: EpistemicTracker, agentId?: string, sessionId?: string): PanicState { + return { + schemaVersion: 1, + panicScore: tracker.panicScore, + panicLevel: tracker.panicLevel, + updatedAt: new Date().toISOString(), + lastOrientAt: tracker.lastOrientAt.toISOString(), + recentOrientCount: tracker.recentOrientCount, + localityConfidence: tracker.localityConfidence, + interventionCountSinceStable: tracker.interventionCountSinceStable, + triggers: [], + agentId, + sessionId, + }; +} diff --git a/src/core/services/mcp-handlers/panic-response.test.ts b/src/core/services/mcp-handlers/panic-response.test.ts new file mode 100644 index 00000000..5005540c --- /dev/null +++ b/src/core/services/mcp-handlers/panic-response.test.ts @@ -0,0 +1,245 @@ +/** + * Tests for panic-response.ts + * - applyPanicHysteresis + * - readPanicState / writePanicState + * - buildPanicCheckOutput + * - getPanicSignalText + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { mkdtemp, mkdir, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { + applyPanicHysteresis, + defaultPanicState, + readPanicState, + writePanicState, + buildPanicCheckOutput, + getPanicSignalText, +} from './panic-response.js'; +import type { PanicState, PanicLevel } from './panic-response.js'; +import { OPENLORE_DIR } from '../../../constants.js'; + +// ============================================================================ +// applyPanicHysteresis +// ============================================================================ + +describe('applyPanicHysteresis', () => { + it('stays 0 below up-threshold', () => { + expect(applyPanicHysteresis(0, 29, 0)).toBe(0); + }); + + it('transitions 0→1 at score 30', () => { + expect(applyPanicHysteresis(0, 30, 0)).toBe(1); + }); + + it('transitions 1→2 at score 50', () => { + expect(applyPanicHysteresis(1, 50, 0)).toBe(2); + }); + + it('transitions 2→3 at score 70', () => { + expect(applyPanicHysteresis(2, 70, 0)).toBe(3); + }); + + it('L3→L4 requires staleDepth ≥ 3', () => { + expect(applyPanicHysteresis(3, 90, 2)).toBe(3); // score meets threshold but stale too low + expect(applyPanicHysteresis(3, 90, 3)).toBe(4); + }); + + it('does not downgrade when score above down-threshold', () => { + expect(applyPanicHysteresis(2, 41, 0)).toBe(2); // down-threshold for L2 is 40 + }); + + it('downgrade 2→1 when score below 40', () => { + expect(applyPanicHysteresis(2, 39, 0)).toBe(1); + }); + + it('downgrade 3→2 when score below 60', () => { + expect(applyPanicHysteresis(3, 59, 0)).toBe(2); + }); + + it('no simultaneous up and down transition', () => { + // score 30 → up to 1; no further down in same call + expect(applyPanicHysteresis(0, 30, 0)).toBe(1); + }); + + it('panic ceiling: staleDepth ≥ 3 floors minimum at L2', () => { + // even score 0 → at least L2 when staleDepth=3 + expect(applyPanicHysteresis(0, 0, 3)).toBe(2); + }); + + it('panic ceiling: staleDepth ≥ 2 floors minimum at L1', () => { + expect(applyPanicHysteresis(0, 0, 2)).toBe(1); + }); + + it('panic ceiling: staleDepth 0 no floor', () => { + expect(applyPanicHysteresis(0, 0, 0)).toBe(0); + }); + + it('L4 stays at L4 — no upward beyond max', () => { + expect(applyPanicHysteresis(4, 100, 3)).toBe(4); + }); +}); + +// ============================================================================ +// readPanicState / writePanicState +// ============================================================================ + +describe('readPanicState', () => { + let dir: string; + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'openlore-panic-test-')); + await mkdir(join(dir, OPENLORE_DIR), { recursive: true }); + }); + + it('returns defaultPanicState when file missing (fail-open)', () => { + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + expect(state.panicScore).toBe(0); + expect(state.schemaVersion).toBe(1); + }); + + it('returns defaultPanicState on parse error (fail-open)', async () => { + await writeFile(join(dir, OPENLORE_DIR, 'panic-state.json'), 'not-json', 'utf-8'); + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + }); + + it('returns defaultPanicState on wrong schema version (fail-open)', async () => { + const bad = JSON.stringify({ schemaVersion: 99, panicScore: 80, panicLevel: 3, updatedAt: new Date().toISOString() }); + await writeFile(join(dir, OPENLORE_DIR, 'panic-state.json'), bad, 'utf-8'); + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + }); + + it('returns defaultPanicState when session expired (>30min)', async () => { + const old = new Date(Date.now() - 31 * 60 * 1000).toISOString(); + const expired: PanicState = { ...defaultPanicState(), panicScore: 80, panicLevel: 3, updatedAt: old, lastOrientAt: old }; + await writeFile(join(dir, OPENLORE_DIR, 'panic-state.json'), JSON.stringify(expired), 'utf-8'); + const state = readPanicState(dir); + expect(state.panicLevel).toBe(0); + }); + + it('round-trips state within session', () => { + const initial: PanicState = { + ...defaultPanicState(), + panicScore: 55, + panicLevel: 2, + triggers: ['oscillation'], + }; + writePanicState(dir, initial); + const read = readPanicState(dir); + expect(read.panicScore).toBe(55); + expect(read.panicLevel).toBe(2); + expect(read.triggers).toEqual(['oscillation']); + }); +}); + +// ============================================================================ +// buildPanicCheckOutput +// ============================================================================ + +describe('buildPanicCheckOutput', () => { + it('returns allow at level 0', () => { + const out = buildPanicCheckOutput(defaultPanicState()); + expect(out.decision).toBe('allow'); + expect(out.severity).toBeUndefined(); + }); + + it('returns warn at level 1 with no prior intervention', () => { + const state: PanicState = { ...defaultPanicState(), panicLevel: 1 }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('warn'); + expect(out.severity).toBe('elevated'); + expect(out.message).toContain('[PANIC:ELEVATED]'); + }); + + it('returns allow when within L1 cooldown (120s)', () => { + const recentIntervention = new Date(Date.now() - 60_000).toISOString(); // 60s ago < 120s cooldown + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 1, + lastHookInterventionAt: recentIntervention, + }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('allow'); + }); + + it('returns warn when L1 cooldown expired (>120s)', () => { + const oldIntervention = new Date(Date.now() - 130_000).toISOString(); + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 1, + lastHookInterventionAt: oldIntervention, + }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('warn'); + }); + + it('L4 always fires regardless of cooldown', () => { + const recentIntervention = new Date(Date.now() - 1_000).toISOString(); + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 4, + lastHookInterventionAt: recentIntervention, + }; + const out = buildPanicCheckOutput(state); + expect(out.decision).toBe('warn'); + expect(out.severity).toBe('critical'); + }); + + it('switches to directive message at interventionCountSinceStable ≥ 3', () => { + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 2, + interventionCountSinceStable: 3, + }; + const out = buildPanicCheckOutput(state); + expect(out.message).toContain('[PANIC:PLANNING:DIRECTIVE]'); + }); + + it('uses advisory message at interventionCountSinceStable < 3', () => { + const state: PanicState = { + ...defaultPanicState(), + panicLevel: 2, + interventionCountSinceStable: 2, + }; + const out = buildPanicCheckOutput(state); + expect(out.message).toContain('[PANIC:PLANNING]'); + expect(out.message).not.toContain('DIRECTIVE'); + }); + + it('severity map: L1→elevated, L2→panic, L3→scope, L4→critical', () => { + const levels: [PanicLevel, string][] = [[1, 'elevated'], [2, 'panic'], [3, 'scope'], [4, 'critical']]; + for (const [level, expected] of levels) { + const state: PanicState = { ...defaultPanicState(), panicLevel: level }; + const out = buildPanicCheckOutput(state); + expect(out.severity).toBe(expected); + } + }); +}); + +// ============================================================================ +// getPanicSignalText +// ============================================================================ + +describe('getPanicSignalText', () => { + it('returns null at level 0', () => { + expect(getPanicSignalText(defaultPanicState())).toBeNull(); + }); + + it('returns advisory text at level 1', () => { + const state: PanicState = { ...defaultPanicState(), panicLevel: 1 }; + const text = getPanicSignalText(state); + expect(text).not.toBeNull(); + expect(text).toContain('[PANIC:ELEVATED]'); + }); + + it('returns directive text when interventionCountSinceStable ≥ 3', () => { + const state: PanicState = { ...defaultPanicState(), panicLevel: 3, interventionCountSinceStable: 3 }; + const text = getPanicSignalText(state); + expect(text).toContain('DIRECTIVE'); + }); +}); diff --git a/src/core/services/mcp-handlers/panic-response.ts b/src/core/services/mcp-handlers/panic-response.ts new file mode 100644 index 00000000..834c038b --- /dev/null +++ b/src/core/services/mcp-handlers/panic-response.ts @@ -0,0 +1,202 @@ +/** + * Panic Response Layer — behavioral destabilization detection. + * + * Separate from EpistemicLease (freshness = epistemic authority decay). + * Panic = observable behavioral instability: oscillation, trajectory bursts, + * repeated stale-depth-3 persistence. + * + * State file: .openlore/panic-state.json (atomic writes, fail-open reads). + * Hook consumer: `openlore panic-check` reads this file before every agent tool call. + */ + +import { writeFileSync, renameSync, readFileSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { OPENLORE_DIR } from '../../../constants.js'; + +// ============================================================================ +// TYPES +// ============================================================================ + +export type PanicLevel = 0 | 1 | 2 | 3 | 4; + +export interface PanicState { + schemaVersion: 1; + panicScore: number; + panicLevel: PanicLevel; + updatedAt: string; + lastOrientAt: string; + lastHookInterventionAt?: string; + recentOrientCount: number; + localityConfidence: number; + interventionCountSinceStable: number; + triggers: string[]; + agentId?: string; + sessionId?: string; +} + +export interface PanicCheckOutput { + decision: 'allow' | 'warn'; + severity?: 'elevated' | 'panic' | 'scope' | 'critical'; + message?: string; +} + +// ============================================================================ +// CONSTANTS +// ============================================================================ + +const PANIC_STATE_FILE = 'panic-state.json'; +const SESSION_EXPIRY_MS = 30 * 60 * 1000; + +// Hysteresis: separate up/down thresholds prevent score thrashing at boundaries +const PANIC_UP_THRESHOLD: Record = { 0: 30, 1: 50, 2: 70, 3: 90 }; +const PANIC_DOWN_THRESHOLD: Record = { 1: 20, 2: 40, 3: 60, 4: 80 }; + +// Cooldowns: sparse injection prevents context saturation and habituation +const HOOK_COOLDOWN_MS: Record = { 0: 0, 1: 120_000, 2: 60_000, 3: 30_000, 4: 0 }; + +// ============================================================================ +// HYSTERESIS +// ============================================================================ + +export function applyPanicHysteresis(current: PanicLevel, score: number, staleDepth: number): PanicLevel { + let level = current; + + // Attempt upward transition + if (level < 4) { + if (level === 3) { + // L3→L4 requires both score threshold AND staleDepth ≥ 3 + if (score >= PANIC_UP_THRESHOLD[3] && staleDepth >= 3) level = 4; + } else { + if (score >= PANIC_UP_THRESHOLD[level]) level = (level + 1) as PanicLevel; + } + } + + // Attempt downward transition (only if we did not just go up) + if (level === current && level > 0) { + if (score < PANIC_DOWN_THRESHOLD[level]) level = (level - 1) as PanicLevel; + } + + // Panic ceiling: stale depth floors minimum level + const minLevel: PanicLevel = staleDepth >= 3 ? 2 : staleDepth >= 2 ? 1 : 0; + return Math.max(level, minLevel) as PanicLevel; +} + +// ============================================================================ +// STATE I/O +// ============================================================================ + +export function defaultPanicState(): PanicState { + const now = new Date().toISOString(); + return { + schemaVersion: 1, + panicScore: 0, + panicLevel: 0, + updatedAt: now, + lastOrientAt: now, + recentOrientCount: 0, + localityConfidence: 0, + interventionCountSinceStable: 0, + triggers: [], + }; +} + +/** + * Reads panic state. Fails open on all error paths: + * missing file, parse error, wrong schema version, expired session. + */ +export function readPanicState(directory: string): PanicState { + try { + const path = join(directory, OPENLORE_DIR, PANIC_STATE_FILE); + if (!existsSync(path)) return defaultPanicState(); + + const raw = readFileSync(path, 'utf-8'); + const parsed = JSON.parse(raw) as Partial; + + if (parsed.schemaVersion !== 1) return defaultPanicState(); + + // Session hard reset: zombie state from a previous session must not leak + if (parsed.updatedAt) { + const age = Date.now() - new Date(parsed.updatedAt).getTime(); + if (age > SESSION_EXPIRY_MS) return defaultPanicState(); + } + + return { ...defaultPanicState(), ...parsed, schemaVersion: 1 }; + } catch { + return defaultPanicState(); + } +} + +/** + * Atomically writes panic state. POSIX rename(2) is atomic on same filesystem. + * Never throws — must not crash the hot path. + */ +export function writePanicState(directory: string, state: PanicState): void { + try { + const path = join(directory, OPENLORE_DIR, PANIC_STATE_FILE); + const tmp = `${path}.tmp`; + writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf-8'); + renameSync(tmp, path); + } catch { + // never crash the hot path + } +} + +// ============================================================================ +// PANIC CHECK OUTPUT (hook response builder) +// ============================================================================ + +const ADVISORY_MESSAGES: Record = { + 0: '', + 1: '[PANIC:ELEVATED] Recent navigation suggests increasing architectural uncertainty.\nConsider: summarize current assumptions, identify uncertain dependencies, call orient().', + 2: '[PANIC:PLANNING] Before cross-module modification, state:\n1. Intended architectural impact 2. Modules affected 3. Rollback strategy\nThen proceed.', + 3: '[PANIC:SCOPE] Cross-module writes discouraged until orient().\nPrefer local changes. orient() expands operational scope.', + 4: '[PANIC:CRITICAL] Critical epistemic instability. Call orient() before further modifications.', +}; + +const DIRECTIVE_MESSAGES: Record = { + 0: '', + 1: '[PANIC:ELEVATED:DIRECTIVE] Previous checkpoint ignored. Stop and call orient() now.', + 2: '[PANIC:PLANNING:DIRECTIVE] Previous checkpoint ignored. Stop. Run orient() now before proceeding.', + 3: '[PANIC:SCOPE:DIRECTIVE] Scope reduction warning ignored. Stop all cross-module writes. Call orient() immediately.', + 4: '[PANIC:CRITICAL] Critical epistemic instability. Call orient() before further modifications.', +}; + +const SEVERITY_MAP: Record = { + 0: undefined, 1: 'elevated', 2: 'panic', 3: 'scope', 4: 'critical', +}; + +/** + * Builds the structured output for the panic-check CLI hook consumer. + * Always exits 0 — severity encoded in payload, not exit code. + * Applies per-level cooldown: no-ops if intervention fired recently. + */ +export function buildPanicCheckOutput(state: PanicState): PanicCheckOutput { + if (state.panicLevel === 0) return { decision: 'allow' }; + + // Apply cooldown (L4 is exempt — always fires) + if (state.panicLevel < 4 && state.lastHookInterventionAt) { + const elapsed = Date.now() - new Date(state.lastHookInterventionAt).getTime(); + if (elapsed < HOOK_COOLDOWN_MS[state.panicLevel]) return { decision: 'allow' }; + } + + const isDirective = state.interventionCountSinceStable >= 3; + const messages = isDirective ? DIRECTIVE_MESSAGES : ADVISORY_MESSAGES; + const message = messages[state.panicLevel]; + + return { + decision: 'warn', + severity: SEVERITY_MAP[state.panicLevel], + message, + }; +} + +/** + * Returns panic signal text for MCP tool response injection. + * Appended after result (not prepended) to preserve JSON structure. + */ +export function getPanicSignalText(state: PanicState): string | null { + if (state.panicLevel === 0) return null; + const isDirective = state.interventionCountSinceStable >= 3; + const messages = isDirective ? DIRECTIVE_MESSAGES : ADVISORY_MESSAGES; + return messages[state.panicLevel] ?? null; +} From decc8fc0cb4fc8202c8b308a4fc1d536aac3c3b6 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Tue, 19 May 2026 21:05:35 +0200 Subject: [PATCH 02/22] perf(search): cache LanceDB connection and BM25 rows per MCP session MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit connect() + openTable() now called once per dbPath, not every search. BM25 full table scan eliminated on cache hit — cached rows reused. Both caches invalidated by build() when index is rebuilt. --- src/core/analyzer/vector-index.ts | 60 ++++++++++++++++--------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/src/core/analyzer/vector-index.ts b/src/core/analyzer/vector-index.ts index 77cde7cc..40d5337e 100644 --- a/src/core/analyzer/vector-index.ts +++ b/src/core/analyzer/vector-index.ts @@ -140,9 +140,14 @@ function rrfScore(rankDense: number, rankSparse: number, k = 60): number { return 1 / (k + rankDense + 1) + 1 / (k + rankSparse + 1); } -// Module-level BM25 corpus cache: avoids a full table scan on every search call -// when the index hasn't changed. Keyed by dbPath; invalidated when row count changes. -const _bm25Cache = new Map(); +// Module-level BM25 corpus cache: avoids a full table scan on every search call. +// Keyed by dbPath; invalidated by build() when the index is rebuilt. +const _bm25Cache = new Map[] }>(); + +// Module-level LanceDB table cache: avoids connect() + openTable() on every search call. +// Invalidated by build() when the index is rebuilt. +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const _tableCache = new Map(); // ============================================================================ // HELPERS @@ -372,6 +377,10 @@ export class VectorIndex { const db = await connect(dbPath); await db.createTable(TABLE_NAME, fullRecords as unknown as Record[], { mode: 'overwrite' }); + // Invalidate search caches — index was just rebuilt + _tableCache.delete(dbPath); + _bm25Cache.delete(dbPath); + return { embedded: toEmbed.length, reused: cachedIdx.length }; } @@ -397,8 +406,6 @@ export class VectorIndex { hybrid?: boolean; } = {} ): Promise { - const { connect } = await import('@lancedb/lancedb'); - const { limit = 10, language, minFanIn, hybrid = true } = opts; if (!VectorIndex.exists(outputDir)) { @@ -406,8 +413,16 @@ export class VectorIndex { } const dbPath = join(outputDir, DB_FOLDER); - const db = await connect(dbPath); - const table = await db.openTable(TABLE_NAME); + let tableEntry = _tableCache.get(dbPath); + if (!tableEntry) { + const { connect } = await import('@lancedb/lancedb'); + const db = await connect(dbPath); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const table: any = await db.openTable(TABLE_NAME); + tableEntry = { table }; + _tableCache.set(dbPath, tableEntry); + } + const table = tableEntry.table; // ── BM25-only path (no embedding service available) ─────────────────────── if (!embedSvc) { @@ -425,7 +440,7 @@ export class VectorIndex { if (!queryVector) throw new Error('Failed to embed query'); const denseFetch = hybrid ? Math.min(limit * 5, 500) : Math.min(limit * 10, 1000); - const denseRows = await table.query().nearestTo(queryVector).limit(denseFetch).toArray(); + const denseRows = await table.query().nearestTo(queryVector).limit(denseFetch).toArray() as Record[]; const passesFilters = (row: Record): boolean => { if (language && (row.language as string) !== language) return false; @@ -446,22 +461,15 @@ export class VectorIndex { let allRows: Record[]; if (!cachedEntry) { - allRows = await table.query().toArray(); + allRows = await table.query().toArray() as Record[]; const corpus = buildBm25Corpus( allRows.map(r => ({ id: r.id as string, text: r.text as string })) ); - cachedEntry = { corpus, rowCount: allRows.length }; + cachedEntry = { corpus, rowCount: allRows.length, rows: allRows }; _bm25Cache.set(dbPath, cachedEntry); } else { - // Lightweight cache validation: re-scan only if row count has changed - allRows = await table.query().toArray(); - if (allRows.length !== cachedEntry.rowCount) { - const corpus = buildBm25Corpus( - allRows.map(r => ({ id: r.id as string, text: r.text as string })) - ); - cachedEntry = { corpus, rowCount: allRows.length }; - _bm25Cache.set(dbPath, cachedEntry); - } + // Use cached rows — invalidated by build() when index is rebuilt + allRows = cachedEntry.rows; } const { corpus } = cachedEntry; @@ -531,21 +539,15 @@ export class VectorIndex { let allRows: Record[]; if (!cachedEntry) { - allRows = await table.query().toArray(); + allRows = await table.query().toArray() as Record[]; const corpus = buildBm25Corpus( allRows.map(r => ({ id: r.id as string, text: r.text as string })) ); - cachedEntry = { corpus, rowCount: allRows.length }; + cachedEntry = { corpus, rowCount: allRows.length, rows: allRows }; _bm25Cache.set(dbPath, cachedEntry); } else { - allRows = await table.query().toArray(); - if (allRows.length !== cachedEntry.rowCount) { - const corpus = buildBm25Corpus( - allRows.map(r => ({ id: r.id as string, text: r.text as string })) - ); - cachedEntry = { corpus, rowCount: allRows.length }; - _bm25Cache.set(dbPath, cachedEntry); - } + // Use cached rows — invalidated by build() when index is rebuilt + allRows = cachedEntry.rows; } const { corpus } = cachedEntry; From bf81a651e4d28389e8607660485725468adb6504 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Wed, 20 May 2026 19:49:21 +0200 Subject: [PATCH 03/22] feat(panic): complete Gryph integration and panic signal layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - gryph-bridge: optional CLI bridge for safedep/gryph observability; queryGryphSignals() + applyGryphDelta() with fail-open absence semantics - epistemic-lease: replace proxy signals with spec-correct panic weights: trajectory burst (density≥0.60→+15), oscillation spike (osc≥0.50→+10), staleDepth≥3→+25 per call; add passive decay (−5/min wall-clock) and locality recovery (−3/call when stable); add lastPanicUpdateAt and panicTriggers fields; expose triggers via trackerToPanicState() - panic-check: add --format claude|kilo|codex; query Gryph and apply delta before building hook output; emit gryph_enriched in telemetry - telemetry: add 50MB rotation (keep 5 rotated files); add panic section to telemetry command (episodes, avg recovery latency, hook intercepts, orient spam, Gryph enrichment rate, trigger frequency) - setup: add --hooks claude|kilo|codex to install PreToolUse panic-check hook into .claude/settings.json independently of --tools --- src/cli/commands/panic-check.ts | 45 ++++- src/cli/commands/setup.ts | 60 +++++- src/cli/commands/telemetry.ts | 105 +++++++++- .../services/mcp-handlers/epistemic-lease.ts | 65 +++++- .../services/mcp-handlers/gryph-bridge.ts | 191 ++++++++++++++++++ src/core/services/telemetry.ts | 25 ++- 6 files changed, 470 insertions(+), 21 deletions(-) create mode 100644 src/core/services/mcp-handlers/gryph-bridge.ts diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index ffc6b04c..abd2f565 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -11,14 +11,40 @@ import { Command } from 'commander'; import { readPanicState, writePanicState, buildPanicCheckOutput } from '../../core/services/mcp-handlers/panic-response.js'; +import { queryGryphSignals, applyGryphDelta } from '../../core/services/mcp-handlers/gryph-bridge.js'; import { emit } from '../../core/services/telemetry.js'; +type HookFormat = 'claude' | 'kilo' | 'codex'; + export const panicCheckCommand = new Command('panic-check') .description('Check current panic level (PreToolUse hook consumer)') .option('-d, --directory ', 'Project directory', process.cwd()) - .action((options: { directory: string }) => { + .option('-f, --format ', 'Hook format: claude|kilo|codex', 'claude') + .action((options: { directory: string; format: string }) => { const dir = options.directory; - const state = readPanicState(dir); + const format = options.format as HookFormat; + let state = readPanicState(dir); + + // Gryph enrichment — fail-open, query from lastOrientAt (or 15min ago if absent) + const since = state.lastOrientAt ?? new Date(Date.now() - 15 * 60 * 1000).toISOString(); + const gryphSignals = queryGryphSignals(since); + if (gryphSignals) { + const enrichedTriggers = [...state.triggers]; + const enrichedScore = applyGryphDelta( + state.panicScore, + gryphSignals, + state.panicLevel >= 2, // isStale when at L2+ + enrichedTriggers, + ); + if (enrichedScore !== state.panicScore) { + state = { + ...state, + panicScore: enrichedScore, + triggers: enrichedTriggers, + }; + } + } + const output = buildPanicCheckOutput(state); if (output.decision === 'warn') { @@ -31,13 +57,26 @@ export const panicCheckCommand = new Command('panic-check') emit(dir, 'panic', { event: 'hook_intervention', channel: 'pre_tool_use', + format, panic_level: state.panicLevel, severity: output.severity, directive_mode: newCount >= 3, intervention_count: newCount, + gryph_enriched: gryphSignals !== null, }); } - process.stdout.write(JSON.stringify(output) + '\n'); + process.stdout.write(formatOutput(output, format) + '\n'); process.exit(0); }); + +function formatOutput(output: ReturnType, format: HookFormat): string { + // claude and codex both consume raw JSON — codex uses the same Claude Code hook schema + if (format === 'claude' || format === 'codex') { + return JSON.stringify(output); + } + + // kilo: plain-text message (some runtimes just want a string signal) + if (output.decision === 'allow') return ''; + return `[PANIC:${output.severity?.toUpperCase() ?? 'WARN'}] ${output.message ?? 'Destabilization detected — call orient().'}`; +} diff --git a/src/cli/commands/setup.ts b/src/cli/commands/setup.ts index 46bb6cd3..baaf0e64 100644 --- a/src/cli/commands/setup.ts +++ b/src/cli/commands/setup.ts @@ -24,6 +24,50 @@ import { checkbox } from '@inquirer/prompts'; import { logger } from '../../utils/logger.js'; import { installPreCommitHook, installClaudeHook } from './decisions.js'; +// ============================================================================ +// PANIC CHECK HOOK +// Installs openlore panic-check as a PreToolUse hook in .claude/settings.json. +// ============================================================================ + +const PANIC_CHECK_HOOK_MARKER = 'openlore panic-check'; + +interface ClaudeHookSettings { + hooks?: { + PreToolUse?: Array<{ _comment?: string; [key: string]: unknown }>; + PostToolUse?: Array<{ _comment?: string; [key: string]: unknown }>; + [key: string]: unknown; + }; + [key: string]: unknown; +} + +export async function installPanicCheckHook(rootPath: string, format: string = 'claude'): Promise { + const settingsPath = join(rootPath, '.claude', 'settings.json'); + let settings: ClaudeHookSettings = {}; + + try { + settings = JSON.parse(await readFile(settingsPath, 'utf-8')) as ClaudeHookSettings; + } catch { /* file missing or corrupt — start fresh */ } + + const hooks = settings.hooks?.PreToolUse ?? []; + if (hooks.some((h) => JSON.stringify(h).includes(PANIC_CHECK_HOOK_MARKER))) { + logger.success('panic-check PreToolUse hook already present in .claude/settings.json'); + return; + } + + const hookEntry = { + _comment: 'openlore: behavioral destabilization guard — fires before every tool call', + type: 'command', + command: `openlore panic-check --directory "$(pwd)" --format ${format}`, + }; + + settings.hooks ??= {}; + settings.hooks.PreToolUse = [...hooks, hookEntry]; + + await mkdir(join(rootPath, '.claude'), { recursive: true }); + await writeFile(settingsPath, JSON.stringify(settings, null, 2) + '\n', 'utf-8'); + logger.success(`panic-check PreToolUse hook added to .claude/settings.json (format: ${format})`); +} + // ============================================================================ // TYPES // ============================================================================ @@ -280,7 +324,11 @@ export const setupCommand = new Command('setup') false ) .option('--dir ', 'Project root directory', process.cwd()) - .action(async (options: { tools?: string; force: boolean; dir: string }) => { + .option( + '--hooks ', + 'Install PreToolUse panic-check hook for the given agent format: claude|kilo|codex' + ) + .action(async (options: { tools?: string; force: boolean; dir: string; hooks?: string }) => { const projectRoot = options.dir; const allTools: ToolName[] = ['vibe', 'cline', 'gsd', 'bmad', 'claude', 'opencode', 'omoa']; @@ -365,6 +413,16 @@ export const setupCommand = new Command('setup') await installClaudeHook(projectRoot); } + // --hooks flag: install panic-check PreToolUse hook independently of --tools + if (options.hooks) { + const validFormats = ['claude', 'kilo', 'codex']; + const fmt = validFormats.includes(options.hooks) ? options.hooks : 'claude'; + if (!validFormats.includes(options.hooks)) { + logger.warning(`Unknown hooks format "${options.hooks}" — defaulting to "claude"`); + } + await installPanicCheckHook(projectRoot, fmt); + } + // ── Report ─────────────────────────────────────────────────────────────── const byTool: Record = {}; for (const r of results) { diff --git a/src/cli/commands/telemetry.ts b/src/cli/commands/telemetry.ts index 0054883b..05ae278c 100644 --- a/src/cli/commands/telemetry.ts +++ b/src/cli/commands/telemetry.ts @@ -50,6 +50,17 @@ interface LeaseEvent { from_state?: string; tool?: string; cognitive_load?: number; density?: number; oscillation?: number; age_min?: number; prior_load?: number; prior_depth?: number; } +interface PanicEvent { + ts: string; + event: 'panic_level_change' | 'panic_orient_reset' | 'hook_intervention' | 'panic_signal_injected'; + from_level?: number; to_level?: number; + panic_score?: number; severity?: string; + orient_kind?: 'normal' | 'rapid' | 'spam'; + delta?: number; from_score?: number; to_score?: number; + intervention_count?: number; + call_triggers?: string[]; + gryph_enriched?: boolean; +} // ============================================================================ // METRIC COMPUTATIONS @@ -188,6 +199,78 @@ function computeRecovery(mcp: McpEvent[], lease: LeaseEvent[]) { }; } +/** + * Panic stats: episode count, avg recovery latency, hook intercepts, orient spam. + */ +function computePanicStats(panic: PanicEvent[]) { + // Episodes: sequences from first level change up to return to level 0 + const levelChanges = panic.filter(e => e.event === 'panic_level_change'); + const hookIntercepts = panic.filter(e => e.event === 'hook_intervention').length; + const injections = panic.filter(e => e.event === 'panic_signal_injected').length; + + // Episode: starts when level goes from 0→N, ends when N→0 + const episodes: { start: string; end?: string; peak: number }[] = []; + let inEpisode = false; + let peakLevel = 0; + let startTs = ''; + for (const e of levelChanges.sort((a, b) => a.ts.localeCompare(b.ts))) { + const from = e.from_level ?? 0; + const to = e.to_level ?? 0; + if (!inEpisode && from === 0 && to > 0) { + inEpisode = true; peakLevel = to; startTs = e.ts; + } else if (inEpisode) { + if (to > peakLevel) peakLevel = to; + if (to === 0) { + episodes.push({ start: startTs, end: e.ts, peak: peakLevel }); + inEpisode = false; peakLevel = 0; + } + } + } + if (inEpisode) episodes.push({ start: startTs, peak: peakLevel }); + + // Avg recovery latency (ms): episode start to end + const completedEpisodes = episodes.filter(e => e.end); + const recoveryLatencies = completedEpisodes.map(e => + new Date(e.end!).getTime() - new Date(e.start).getTime() + ); + const avgRecoveryMs = recoveryLatencies.length + ? Math.round(recoveryLatencies.reduce((a, b) => a + b, 0) / recoveryLatencies.length) + : null; + + // Failed recovery rate: episodes that never returned to L0 + const failedRate = episodes.length + ? `${episodes.filter(e => !e.end).length}/${episodes.length}` + : '—'; + + // Orient spam events + const orientResets = panic.filter(e => e.event === 'panic_orient_reset'); + const spamOrients = orientResets.filter(e => e.orient_kind === 'spam').length; + const rapidOrients = orientResets.filter(e => e.orient_kind === 'rapid').length; + + // Gryph enrichments + const gryphEnriched = panic.filter(e => e.event === 'hook_intervention' && e.gryph_enriched).length; + + // Trigger frequency across all events + const triggerCounts = new Map(); + for (const e of panic) { + for (const t of e.call_triggers ?? []) { + triggerCounts.set(t, (triggerCounts.get(t) ?? 0) + 1); + } + } + + return { + panic_episodes: episodes.length, + avg_recovery_ms: avgRecoveryMs, + failed_recovery_rate: failedRate, + hook_intercepts: hookIntercepts, + mcp_injections: injections, + orient_spam_events: spamOrients, + orient_rapid_events: rapidOrients, + gryph_enriched_intercepts: gryphEnriched, + trigger_counts: [...triggerCounts.entries()].sort((a, b) => b[1] - a[1]), + }; +} + /** * Trajectory entropy: low entropy oscillation (auth→billing→auth→billing) vs * exploratory (auth→billing→infra→cache). Uses bigram repetition ratio. @@ -218,7 +301,7 @@ function hr() { console.log('─'.repeat(60)); } function section(title: string) { hr(); console.log(` ${title}`); hr(); } function renderSummary( - mcp: McpEvent[], orient: OrientEvent[], cache: CacheEvent[], lease: LeaseEvent[] + mcp: McpEvent[], orient: OrientEvent[], cache: CacheEvent[], lease: LeaseEvent[], panicEvents: PanicEvent[] ) { const tools = computeToolStats(mcp); const cacheStats = computeCacheStats(cache); @@ -226,6 +309,7 @@ function renderSummary( const obstinacy = computeObstinacy(mcp, lease); const recovery = computeRecovery(mcp, lease); const trajectory = computeTrajectoryEntropy(lease); + const panicStats = computePanicStats(panicEvents); section('TOOL LATENCY'); if (tools.stats.length) { @@ -284,6 +368,18 @@ function renderSummary( console.log(` max density : ${trajectory.max_density}`); console.log(` burst events (≥0.6) : ${trajectory.burst_events}`); + section('PANIC RESPONSE'); + console.log(` panic episodes : ${panicStats.panic_episodes}`); + console.log(` avg recovery latency : ${panicStats.avg_recovery_ms != null ? `${panicStats.avg_recovery_ms}ms` : '—'}`); + console.log(` failed recovery rate : ${panicStats.failed_recovery_rate}`); + console.log(` hook intercepts : ${panicStats.hook_intercepts}`); + console.log(` mcp injections : ${panicStats.mcp_injections}`); + console.log(` orient spam events : ${panicStats.orient_spam_events} (rapid: ${panicStats.orient_rapid_events})`); + console.log(` gryph-enriched : ${panicStats.gryph_enriched_intercepts}`); + if (panicStats.trigger_counts.length) { + console.log(` triggers : ${panicStats.trigger_counts.map(([k, v]) => `${k}×${v}`).join(' ')}`); + } + hr(); } @@ -384,18 +480,19 @@ Examples: return; // keep process alive — watcher keeps running } - const [mcp, orient, cache, lease] = await Promise.all([ + const [mcp, orient, cache, lease, panicEvents] = await Promise.all([ readJsonl(join(telDir, 'mcp.jsonl')), readJsonl(join(telDir, 'orient.jsonl')), readJsonl(join(telDir, 'cache.jsonl')), readJsonl(join(telDir, 'epistemic-lease.jsonl')), + readJsonl(join(telDir, 'panic.jsonl')), ]); - if (!mcp.length && !orient.length && !cache.length && !lease.length) { + if (!mcp.length && !orient.length && !cache.length && !lease.length && !panicEvents.length) { console.log(`No telemetry found at ${telDir}`); console.log('Enable with: export OPENLORE_TELEMETRY=1'); return; } - renderSummary(mcp, orient, cache, lease); + renderSummary(mcp, orient, cache, lease, panicEvents); }); diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index a1de7960..5558e80d 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -72,6 +72,10 @@ export interface EpistemicTracker { recentOrientCount: number; lastOrientResetAt: number; interventionCountSinceStable: number; + /** Epoch ms of last panic score update — for passive decay calculation. */ + lastPanicUpdateAt: number; + /** Accumulated signal trigger labels for the current panic episode. */ + panicTriggers: string[]; } // ============================================================================ @@ -167,6 +171,11 @@ const BURST_TOOL_WEIGHT_THRESHOLD = 8; // tool weight for post-stale burst // Panic constants const RAPID_ORIENT_INTERVAL_MS = 2 * 60 * 1000; // orients within 2min are "rapid" const PANIC_SCORE_MAX = 100; +// Spec-correct panic signal thresholds +const PANIC_TRAJECTORY_DENSITY = 0.60; // trajectory burst → +15 +const PANIC_OSCILLATION_THRESHOLD = 0.50; // oscillation spike → +10 +const PANIC_DECAY_PER_MIN = 5; // passive wall-clock decay +const PANIC_LOCALITY_RECOVERY = 3; // per-call recovery when stable // ============================================================================ // PANIC UPDATE @@ -178,20 +187,47 @@ function updatePanic( tracker: EpistemicTracker, opts: { density: number; oscillation: number; weight: number; staleDepth: number; directory?: string }, ): void { - const { density, oscillation, weight, staleDepth, directory = '' } = opts; + const { density, oscillation, staleDepth, directory = '' } = opts; + const now = Date.now(); + + // Passive wall-clock decay: -5 per minute elapsed since last update + const elapsedMin = tracker.lastPanicUpdateAt > 0 + ? Math.max(0, (now - tracker.lastPanicUpdateAt) / 60_000) + : 0; + const decay = Math.floor(elapsedMin * PANIC_DECAY_PER_MIN); + + // Per-call score delta from behavioral signals (spec §3.1) + let delta = -decay; + const callTriggers: string[] = []; + + if (density >= PANIC_TRAJECTORY_DENSITY) { + delta += 15; + callTriggers.push('trajectory_burst'); + } + if (oscillation >= PANIC_OSCILLATION_THRESHOLD) { + delta += 10; + callTriggers.push('oscillation_spike'); + } + if (staleDepth >= 3) { + delta += 25; + callTriggers.push('stale_depth_3'); + } - // Per-call score delta from behavioral signals - let delta = 0; - delta += density >= CROSS_MODULE_STALE_DENSITY ? 25 : density >= CROSS_MODULE_DEGRADE_DENSITY ? 10 : 0; - delta += Math.round(oscillation * 30); - // Large patch attenuation: when commandEntropy (approximated as oscillation < 0.1) suggests - // legitimate burst work (builds, tests), reduce weight contribution. - const isHighEntropy = oscillation < 0.1 && density >= CROSS_MODULE_DEGRADE_DENSITY; - delta += !isHighEntropy && weight >= BURST_TOOL_WEIGHT_THRESHOLD ? 20 : !isHighEntropy && weight >= 5 ? 10 : 0; + // Locality recovery: calm stable work reduces panic + if (density < 0.10 && oscillation < 0.10 && staleDepth === 0) { + delta -= PANIC_LOCALITY_RECOVERY; + callTriggers.push('locality_recovery'); + } + tracker.lastPanicUpdateAt = now; tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + delta)); tracker.localityConfidence = Math.max(0, 1 - density * 2); + // Accumulate triggers for the current episode + for (const t of callTriggers) { + if (!tracker.panicTriggers.includes(t)) tracker.panicTriggers.push(t); + } + const prevLevel = tracker.panicLevel; tracker.panicLevel = applyPanicHysteresis(tracker.panicLevel, tracker.panicScore, staleDepth); @@ -206,11 +242,13 @@ function updatePanic( oscillation, stale_depth: staleDepth, trigger, + call_triggers: callTriggers, }); } if (tracker.panicLevel === 0 && prevLevel > 0) { tracker.interventionCountSinceStable = 0; + tracker.panicTriggers = []; } } @@ -349,6 +387,8 @@ export function createTracker(directory: string): EpistemicTracker { recentOrientCount: 0, lastOrientResetAt: 0, interventionCountSinceStable: 0, + lastPanicUpdateAt: 0, + panicTriggers: [], }; } @@ -378,7 +418,10 @@ function resetTracker(tracker: EpistemicTracker, directory: string): void { tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + panicDelta)); tracker.localityConfidence = 1; tracker.panicLevel = applyPanicHysteresis(tracker.panicLevel, tracker.panicScore, 0); - if (tracker.panicLevel === 0) tracker.interventionCountSinceStable = 0; + if (tracker.panicLevel === 0) { + tracker.interventionCountSinceStable = 0; + tracker.panicTriggers = []; + } emit(directory, 'panic', { event: 'panic_orient_reset', @@ -640,7 +683,7 @@ export function trackerToPanicState(tracker: EpistemicTracker, agentId?: string, recentOrientCount: tracker.recentOrientCount, localityConfidence: tracker.localityConfidence, interventionCountSinceStable: tracker.interventionCountSinceStable, - triggers: [], + triggers: [...tracker.panicTriggers], agentId, sessionId, }; diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts new file mode 100644 index 00000000..7dc17910 --- /dev/null +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -0,0 +1,191 @@ +/** + * Gryph bridge — optional integration with safedep/gryph observability tool. + * + * Gryph records shell exec and file-write events to a local SQLite store, + * queryable via its CLI. Enriches panic score with signals openlore cannot + * observe directly (commandEntropy, retry bursts, large patches while stale). + * + * MUST degrade gracefully to zero-impact absence semantics: + * - gryph binary absent → returns null, no error, no log noise + * - query timeout (200ms) → returns null + * - unexpected output format → returns null + * - any exception → returns null + */ + +import { spawnSync } from 'node:child_process'; + +// ============================================================================ +// TYPES +// ============================================================================ + +export interface GryphSignals { + /** [0,1] diversity of recent command invocations. Low = retry loop. */ + commandEntropy: number; + /** Low-entropy + repeated failing commands = destabilized shell activity. */ + repetitiveRetryBurst: boolean; + /** Any write event > 500 LOC detected in the time window. */ + largePatchWhileActive: boolean; + /** LOC count of the largest write event seen, 0 if none. */ + largePatchLoc: number; +} + +interface GryphExecEvent { + timestamp?: string; + action?: string; + command?: string; + cmd?: string; // alternate key some versions use + exit_code?: number; + exitCode?: number; +} + +interface GryphWriteEvent { + timestamp?: string; + action?: string; + path?: string; + file?: string; + lines?: number; + loc?: number; + additions?: number; +} + +// ============================================================================ +// CONSTANTS +// ============================================================================ + +const GRYPH_TIMEOUT_MS = 150; // hard budget per query; total ≤ 200ms +const GRYPH_DETECT_TIMEOUT_MS = 50; // PATH check +const LARGE_PATCH_LOC_THRESHOLD = 500; +const ENTROPY_LOW_THRESHOLD = 0.30; // below = low-diversity / retry-loop + +// ============================================================================ +// ENTROPY COMPUTATION +// ============================================================================ + +/** + * Normalised Shannon entropy of a command sequence. + * Returns 1.0 (high entropy / fail-open) when sequence is empty. + */ +function computeCommandEntropy(commands: string[]): number { + if (commands.length === 0) return 1; + const counts = new Map(); + for (const cmd of commands) { + const key = cmd.trim().split(/\s+/)[0] ?? cmd; // normalise to base command + counts.set(key, (counts.get(key) ?? 0) + 1); + } + const n = commands.length; + let entropy = 0; + for (const count of counts.values()) { + const p = count / n; + entropy -= p * Math.log2(p); + } + const maxEntropy = Math.log2(Math.max(counts.size, 1)); + return maxEntropy > 0 ? Math.min(1, entropy / maxEntropy) : 1; +} + +// ============================================================================ +// GRYPH DETECTION +// ============================================================================ + +let _gryphAvailable: boolean | undefined; + +function isGryphAvailable(): boolean { + if (_gryphAvailable !== undefined) return _gryphAvailable; + const result = spawnSync('which', ['gryph'], { + timeout: GRYPH_DETECT_TIMEOUT_MS, + stdio: ['ignore', 'pipe', 'ignore'], + }); + _gryphAvailable = result.status === 0 && Boolean(result.stdout?.toString().trim()); + return _gryphAvailable; +} + +// ============================================================================ +// QUERY HELPERS +// ============================================================================ + +function queryGryph(action: 'exec' | 'write', since: string): unknown[] { + const result = spawnSync( + 'gryph', + ['query', '--format', 'json', '--action', action, '--since', since], + { + timeout: GRYPH_TIMEOUT_MS, + stdio: ['ignore', 'pipe', 'ignore'], + encoding: 'utf-8', + }, + ); + if (result.status !== 0 || !result.stdout) return []; + try { + const parsed = JSON.parse(result.stdout.trim()); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} + +// ============================================================================ +// PUBLIC API +// ============================================================================ + +/** + * Query Gryph for behavioral signals since `since` (ISO 8601). + * Returns null when Gryph is absent or any error occurs — callers must + * treat null as "no additional signals" (fail-open, zero-impact). + */ +export function queryGryphSignals(since: string): GryphSignals | null { + try { + if (!isGryphAvailable()) return null; + + const execEvents = queryGryph('exec', since) as GryphExecEvent[]; + const writeEvents = queryGryph('write', since) as GryphWriteEvent[]; + + // commandEntropy from exec event command strings + const commands = execEvents + .map(e => e.command ?? e.cmd ?? '') + .filter(Boolean); + const commandEntropy = computeCommandEntropy(commands); + + // Repetitive retry burst: low entropy AND any failing command in window + const hasFailures = execEvents.some(e => (e.exit_code ?? e.exitCode ?? 0) !== 0); + const repetitiveRetryBurst = commandEntropy < ENTROPY_LOW_THRESHOLD && hasFailures; + + // Large patch: find max LOC write event + const locs = writeEvents.map(e => e.lines ?? e.loc ?? e.additions ?? 0); + const largePatchLoc = locs.length > 0 ? Math.max(...locs) : 0; + const largePatchWhileActive = largePatchLoc > LARGE_PATCH_LOC_THRESHOLD; + + return { commandEntropy, repetitiveRetryBurst, largePatchWhileActive, largePatchLoc }; + } catch { + return null; // always fail open + } +} + +/** + * Apply Gryph-derived score deltas to a base panic score. + * Returns the adjusted score (clamped [0,100]). + * + * Weights from spec: + * repetitive retry burst: +15 + * large patch (low entropy): +30 + * large patch (high entropy / legitimate refactor): +10 + */ +export function applyGryphDelta( + baseScore: number, + signals: GryphSignals, + isStale: boolean, + triggers: string[], +): number { + let delta = 0; + + if (signals.repetitiveRetryBurst) { + delta += 15; + triggers.push('repetitive_retry_burst'); + } + + if (signals.largePatchWhileActive && isStale) { + // Large patch attenuation: high entropy = deliberate refactor → +10, not +30 + const attenuated = signals.commandEntropy > 0.60; + delta += attenuated ? 10 : 30; + triggers.push(attenuated ? 'large_patch_attenuated' : 'large_patch_stale'); + } + + return Math.min(100, Math.max(0, baseScore + delta)); +} diff --git a/src/core/services/telemetry.ts b/src/core/services/telemetry.ts index fdf4c01e..fc93c9a4 100644 --- a/src/core/services/telemetry.ts +++ b/src/core/services/telemetry.ts @@ -4,15 +4,30 @@ * Gate: OPENLORE_TELEMETRY=1 (disabled by default). * Writes append-only JSONL to .openlore/telemetry/.jsonl. * Never throws — telemetry must not crash the hot path. + * + * Rotation: when a domain file exceeds ROTATE_THRESHOLD_BYTES, it is renamed + * to .1.jsonl and older rotated files shifted (keeps MAX_ROTATED_FILES). */ -import { appendFileSync, mkdirSync } from 'node:fs'; +import { appendFileSync, mkdirSync, renameSync, statSync, unlinkSync } from 'node:fs'; import { join } from 'node:path'; import { OPENLORE_DIR } from '../../constants.js'; const TELEMETRY_SUBDIR = 'telemetry'; +const ROTATE_THRESHOLD_BYTES = 50 * 1024 * 1024; // 50 MB +const MAX_ROTATED_FILES = 5; const _createdDirs = new Set(); +function rotateTelemetryFile(filePath: string): void { + // Shift existing rotated files: .5.jsonl deleted, .4 → .5, …, .1 → .2 + const base = filePath.replace(/\.jsonl$/, ''); + try { unlinkSync(`${base}.${MAX_ROTATED_FILES}.jsonl`); } catch { /* not present */ } + for (let i = MAX_ROTATED_FILES - 1; i >= 1; i--) { + try { renameSync(`${base}.${i}.jsonl`, `${base}.${i + 1}.jsonl`); } catch { /* not present */ } + } + try { renameSync(filePath, `${base}.1.jsonl`); } catch { /* rename failed — continue writing */ } +} + /** * Emit a telemetry event to .openlore/telemetry/.jsonl. * @@ -30,8 +45,14 @@ export function emit( try { const dir = join(directory, OPENLORE_DIR, TELEMETRY_SUBDIR); if (!_createdDirs.has(dir)) { mkdirSync(dir, { recursive: true }); _createdDirs.add(dir); } + const filePath = join(dir, `${domain}.jsonl`); + // Rotate before writing if file exceeds threshold + try { + const { size } = statSync(filePath); + if (size >= ROTATE_THRESHOLD_BYTES) rotateTelemetryFile(filePath); + } catch { /* file doesn't exist yet */ } const line = JSON.stringify({ ts: new Date().toISOString(), ...payload }) + '\n'; - appendFileSync(join(dir, `${domain}.jsonl`), line, 'utf-8'); + appendFileSync(filePath, line, 'utf-8'); } catch { // never crash the hot path } From b743b5d384881ef1c0b0b98eff609ea52737a2ee Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Wed, 20 May 2026 21:21:21 +0200 Subject: [PATCH 04/22] refactor(panic): address pre-merge review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - gryph-bridge: rename largePatchWhileActive → largePatchWhileStale; add OPENLORE_GRYPH_TIMEOUT_MS env var (default 150ms, min 50ms) - mcp.ts: add invariant comment excluding panic-check/telemetry from panic computation loop (CLI procs read state, never call updateTracker) - spec: add Runtime Safety Invariants section (fail-open guarantees for all subsystems); clarify L4 advisory-only enforcement model; add Shannon entropy formula for commandEntropy; add locality recovery philosophical framing; document trajectory tracking continues during stale; document depth-3 saturation intent; add OPENLORE_GRYPH_TIMEOUT_MS to Gryph config table; add interventionCountSinceStable reset conditions to formal invariants --- openspec/changes/panic-response-layer.md | 571 ++++++++++++++++++ src/cli/commands/mcp.ts | 5 +- .../services/mcp-handlers/gryph-bridge.ts | 13 +- 3 files changed, 582 insertions(+), 7 deletions(-) create mode 100644 openspec/changes/panic-response-layer.md diff --git a/openspec/changes/panic-response-layer.md b/openspec/changes/panic-response-layer.md new file mode 100644 index 00000000..ee2c74b4 --- /dev/null +++ b/openspec/changes/panic-response-layer.md @@ -0,0 +1,571 @@ +# Panic Response Layer + +## Summary + +Introduces a behavioral destabilization detection and intervention system for EpistemicLease. +Complements staleness tracking (freshness state) with a separate panic score that measures +observable navigation instability. Intervenes via two channels: MCP tool response injection +(existing) and a new PreToolUse hook that fires before every agent tool call — closing the +tunneling blind spot where a destabilized agent stops calling openlore entirely. + +Gryph integration (optional) enriches the panic score with shell/filesystem signals openlore +cannot observe directly. + +--- + +## Design Principles + +**Behavioral only.** Operates exclusively on observable runtime signals: navigation patterns, +trajectory density, oscillation coefficient, stale depth, write volume. No psychological +modeling, no intent inference, no chain-of-thought inspection. + +**Pacing over policing.** Interventions slow destabilizing execution and encourage re-anchoring. +They do not attempt to fix reasoning. + +**Soft-first escalation.** L1→L4 progressive; L4 advisory-only in initial version. + +**Recovery-first.** No permanent punishment states. `orient()` success applies strong score +reduction. All levels self-resolve on behavioral stabilization. + +**Dual-channel.** MCP injection (reaches agents using openlore) + hook injection (reaches all +agents regardless of openlore usage). + +**Fail-open.** Hook absence, read errors, and internal failures MUST NOT break MCP flow or +block agent operation. System correctness MUST NOT depend on hook execution. + +--- + +## Freshness vs Panic — Explicit Separation + +These are independent dimensions of epistemic state: + +```ts +interface EpistemicState { + freshness: FreshnessState; // architectural authority decay + panic: PanicState; // behavioral destabilization +} +``` + +``` +Freshness models epistemic authority decay. +Panic models behavioral destabilization. +Neither implies the other. + +An agent can be: +- stale but behaviorally calm (linear deep dive into stale context) +- fresh but panicking (rapid confused navigation after recent orient()) +``` + +**Coupling constraint:** Stale depth floors panic level (see Panic Ceiling section). +No other coupling exists. Freshness transitions and panic transitions are computed +independently. Metrics, thresholds, and tuning are kept separate. + +--- + +## Runtime Safety Invariants + +The following properties MUST hold regardless of the internal state of the panic system, +the state file, or Gryph availability: + +``` +- panic-check MUST fail open: exits 0, outputs {"decision":"allow"} on any internal error +- Gryph absence MUST have zero behavioral impact: null returned, no error, no log noise +- Telemetry failure MUST NOT affect tool execution: emit() never throws +- panic-state.json corruption MUST resolve to stable state (panicLevel 0, panicScore 0) +- Hook execution failure MUST NOT block MCP flow +- panic-check and openlore telemetry are excluded from panic computation: these CLI + commands read state but never call updateTracker — no recursive feedback loop +``` + +## Formal Invariants + +``` +- panicScore ∈ [0, 100] (always clamped, never drifts) +- staleDepth monotonically increases until orient() +- panicLevel transitions are hysteretic (no thrashing) +- panic-check exits 0 on all code paths including internal failures +- hook absence never breaks MCP flow +- panic-state.json writes are atomic (temp + rename) +- orient() recovery bonus diminishes with rapid repeat usage +- interventionCountSinceStable resets on: stable recovery (panicLevel→0), orient() reset, + 30min session expiry (state treated as expired, all fields zeroed) +``` + +--- + +## Architecture + +``` +openlore MCP server + └── computes panic score on every tool call + └── writes .openlore/panic-state.json atomically (temp + rename) + └── injects panic signals into MCP tool responses (existing channel) + +openlore panic-check CLI + └── reads .openlore/panic-state.json + └── fails open on parse errors / missing file + └── outputs structured response, always exits 0 + +PreToolUse hook (per agent, thin adapter, best-effort) + └── invokes: openlore panic-check --format + └── fires before EVERY tool call — not just openlore calls + └── closes tunneling blind spot + +Gryph (optional, gracefully absent) + └── detected via PATH at runtime + └── queried by panic-check when available + └── absence = zero-impact, not error +``` + +--- + +## Shared State File + +`.openlore/panic-state.json` — written by MCP server, read by hook without MCP round-trip. + +```json +{ + "schemaVersion": 1, + "panicScore": 42, + "panicLevel": 2, + "updatedAt": "2026-05-19T10:30:00Z", + "lastOrientAt": "2026-05-19T10:25:00Z", + "lastHookInterventionAt": "2026-05-19T10:29:00Z", + "recentOrientCount": 1, + "localityConfidence": 0.7, + "triggers": ["trajectory_burst", "oscillation_spike"], + "agentId": "claude-code", + "sessionId": "abc123", + "interventionCountSinceStable": 0 +} +``` + +**Writes MUST be atomic:** + +```ts +writeFileSync(`${path}.tmp`, json, 'utf-8'); +renameSync(`${path}.tmp`, path); +``` + +POSIX `rename(2)` is atomic on the same filesystem. Prevents partial reads and race +conditions between MCP server writes and hook reads. + +**Corruption handling:** `panic-check` MUST fail open. Invalid JSON, missing file, or +unreadable state is treated as stable state (panicLevel 0). Hook flow is never interrupted +by state file issues. + +**Session hard reset:** If `updatedAt` is more than 30 minutes in the past, `panic-check` +treats the state as expired: panicScore = 0, panicLevel = 0. Prevents zombie state from +polluting a new session. + +**Schema migration:** Consumers check `schemaVersion` before reading. Unknown versions are +treated as stable state (fail open). + +--- + +## Panic Score + +`panicScore ∈ [0, 100]` — clamped after every operation. + +### MCP-derived signals + +| Signal | Weight | +|--------|--------| +| Trajectory burst (density ≥ 0.60) | +15 | +| Oscillation spike (osc ≥ 0.50) | +10 | +| Stale depth 3 persistence (each call) | +25 | + +**Trajectory tracking continues while stale.** Module access window and oscillation score +accumulate during stale state so that post-stale burst and trajectory patterns remain +observable. The stale state does not freeze the behavioral model. + +**Depth-3 persistence intentionally saturates rapidly.** An agent at staleDepth 3 with 4+ +tool calls reaches score 100 within a single burst. This models runaway destabilization — +an agent deep in stale state continuing to make cross-module calls is exhibiting the exact +failure mode the panic layer exists to interrupt. Rapid saturation is a design choice, not +an accidental artifact. + +### Gryph-derived signals (optional) + +| Signal | Weight | Notes | +|--------|--------|-------| +| Large patch while stale (> 500 LOC) | +30 | Write event size — attenuated when commandEntropy is high (see below) | +| Contradiction persistence | +20 | See definition below | +| Repetitive shell retry burst | +15 | See definition below | + +**Raw tool frequency MUST NOT be used directly as a panic signal.** +Only low-entropy repetition patterns are destabilizing. Legitimate activity (builds, tests, +grep, git operations, batch AST traversal) routinely produces high tool frequency. The +signal of interest is behavioral collapse, not throughput. + +`commandEntropy` is normalized Shannon entropy over recent shell command signatures: + +``` +H(commands) = -Σ p(cmd) · log₂(p(cmd)) normalized to [0,1] over max possible entropy +Low entropy = repetitive retry loops (same command repeated, low diversity) +High entropy = exploratory activity (diverse commands, productive burst) +``` + +Low entropy + high frequency = retry burst (panic signal). +High entropy + high frequency = productive exploration (not a panic signal). + +**Contradiction persistence** triggers when: +- Same failure signature (stack trace / test name) repeats N ≥ 3 times +- AND touched module set overlap ≥ 80% between retries (no meaningful trajectory change) +- AND no new module introduced between retries +- NOT triggered by `fail → edit → fail` alone (normal TDD) + +**Repetitive shell retry burst** triggers when: +- High-frequency repeated identical commands OR repeated failing commands +- Low `commandEntropy` over recent window +- NOT triggered by raw command volume + +**Large patch attenuation:** If large patch (> 500 LOC) is accompanied by high `commandEntropy` +(diverse command sequence consistent with deliberate refactoring), weight is reduced from +30 +to +10. High entropy + large patch = likely legitimate boilerplate generation. Low entropy + +large patch = likely panicked patching. + +**Meaningful file trajectory change** is defined as: +- Touched module set overlap < 80% with previous attempt, OR +- At least one new module introduced, OR +- Edit distance of touched file set > 2 + +### Decay + +- **Passive:** `-5 / minute` based on wall-clock elapsed since `updatedAt` +- **orient() success:** recovery bonus (see orient spam protection below) +- **Locality recovery:** `-3 / call` when `density < 0.10 && oscillation < 0.10 && staleDepth = 0` + + Behavioral stabilization is inferred from sustained local navigation with low oscillation + and low trajectory density. The system does not observe intent — it observes the spatial + coherence of tool usage. Concentrated, low-oscillation navigation is treated as evidence + of anchored, productive work. + +Score clamped to `[0, 100]` after every operation. + +### orient() Spam Protection + +`orient()` recovery is diminishing to prevent gaming the reset mechanism: + +| Condition | Recovery bonus | +|-----------|---------------| +| Normal usage | -40 | +| < 2 min since previous orient() | -15 | +| ≥ 3 rapid resets in current session | 0 | + +`recentOrientCount` and `lastOrientAt` tracked in panic state. + +--- + +## Panic Levels + +### Hysteresis Table + +Up and down transitions use different thresholds to prevent thrashing at boundary values: + +| Transition | Condition | +|-----------|-----------| +| L0 → L1 | score ≥ 30 | +| L1 → L0 | score < 20 | +| L1 → L2 | score ≥ 50 | +| L2 → L1 | score < 40 | +| L2 → L3 | score ≥ 70 | +| L3 → L2 | score < 60 | +| L3 → L4 | score ≥ 90 AND stale_depth ≥ 3 | +| L4 → L3 | score < 80 | + +### Panic Ceiling (stale depth floors) + +``` +While staleDepth ≥ 2: minimum panicLevel = 1 +While staleDepth = 3: minimum panicLevel = 2 +``` + +A critically stale agent cannot report Stable behavior. Floors are applied after hysteresis. + +### Summary Table + +| Level | Up threshold | Down threshold | Name | Channel | +|-------|-------------|----------------|------|---------| +| 0 | — | — | Stable | — | +| 1 | ≥ 30 | < 20 | Elevated | MCP + hook | +| 2 | ≥ 50 | < 40 | Panic | MCP + hook | +| 3 | ≥ 70 | < 60 | Scope Reduction | MCP + hook | +| 4 | ≥ 90 + stale3 | < 80 | Critical | hook advisory | + +### Hook Injection Cooldowns + +To prevent context saturation and habituation, hook interventions are rate-limited per level: + +| Level | Cooldown | +|-------|----------| +| L1 | 120s | +| L2 | 60s | +| L3 | 30s | +| L4 | 0s (always fires) | + +`lastHookInterventionAt` in panic state. Cooldown tracked per level. + +**Anti-wallpaper (stateful):** `interventionCountSinceStable` tracked in panic state. +When the same level fires ≥ 3 times since last Stable without score improvement, the +intervention mode escalates from advisory to directive: + +``` +// Advisory (first interventions) +[PANIC:PLANNING] Before cross-module modification, state: ... + +// Directive (≥3 repeated, no recovery) +[PANIC:PLANNING:DIRECTIVE] Previous checkpoint ignored. Stop. Run orient() now before proceeding. +``` + +Directive mode resets to advisory on any score reduction. This is V1 implementable — requires +only `interventionCountSinceStable: number` in the state file. + +### Intervention Messages + +**Level 1 — Reflective Checkpoint** +``` +[PANIC:ELEVATED] Recent navigation suggests increasing architectural uncertainty. +Consider: summarize current assumptions, identify uncertain dependencies, call orient(). +``` + +**Level 2 — Planning Enforcement** +``` +[PANIC:PLANNING] Before cross-module modification, state: +1. Intended architectural impact 2. Modules affected 3. Rollback strategy +Then proceed. +``` + +**Level 3 — Scope Reduction** +``` +[PANIC:SCOPE] Cross-module writes discouraged until orient(). +Prefer local changes. orient() expands operational scope. +``` + +**Level 4 — Circuit Breaker (advisory)** +``` +[PANIC:CRITICAL] Critical epistemic instability. Call orient() before further modifications. +``` + +--- + +## New Files + +- `src/core/services/mcp-handlers/panic-response.ts` — panic score computation, state + management, signal detection, atomic state writes. Reads from `EpistemicTracker` (reuses + existing `oscillation`, `density`, `staleDepth` fields). Exports `PanicState`, + `computePanicScore`, `writePanicState`, `applyHysteresis`. + +- `src/cli/commands/panic-check.ts` — `openlore panic-check` CLI command. Reads + `.openlore/panic-state.json` with fail-open semantics. Outputs structured response, + always exits 0. Supports `--format claude|kilo|codex`. Optionally queries Gryph. + +- `openspec/specs/panic-response/spec.md` — domain spec (generated after implementation). + +--- + +## Modified Files + +- `src/core/services/mcp-handlers/epistemic-lease.ts` — extend `EpistemicTracker` with + `panicScore: number`, `panicLevel: 0|1|2|3|4`, `localityConfidence: number`, + `recentOrientCount: number`. Panic computed alongside freshness on every `updateTracker()` + call. Reuses `oscillation`, `density`, `staleDepth` already computed. Explicit separation: + panic computation does not modify freshness fields and vice versa. + +- `src/core/services/mcp-handlers/utils.ts` — add `writePanicState(directory, state)` with + atomic temp+rename semantics. Called from `updateTracker()` after panic recomputation. + +- `src/cli/commands/mcp.ts` — ensure `writePanicState` fires on every tool dispatch. + +- `src/cli/index.ts` — register `panic-check` command. + +- `src/cli/commands/telemetry.ts` — add panic section: episodes, avg recovery latency, + hook intercepts, failed recovery rate. Telemetry reads `panic-response.jsonl`. + +--- + +## Hook Integration + +### Agent Capability Model + +```ts +interface AgentCapabilities { + supportsHooks: boolean; + supportsStructuredIntervention: boolean; + supportsBlockSemantics: boolean; +} +``` + +Capabilities declared per format. `panic-check --format ` uses the capability +profile for that agent to shape output. Unknown format = fall back to plain text warn. + +### openlore panic-check + +``` +openlore panic-check [--directory ] [--format claude|kilo|codex] +``` + +**Always exits 0.** Non-zero exit would be misinterpreted as tool crash / hook failure. +Intervention semantics are expressed exclusively through structured output. + +Structured output: + +```json +// L0 — stable +{"decision": "allow"} + +// L1-L3 — warning +{"decision": "warn", "severity": "elevated|panic|scope", "message": "..."} + +// L4 — advisory block +{"decision": "warn", "severity": "critical", "message": "[PANIC:CRITICAL] ..."} +``` + +**L4 uses `warn` + `severity: critical`, not `decision: block`.** Keeps semantics +consistent. Agent adapter MAY escalate `critical` to a block; it MAY NOT be forced to. +This is advisory architecture, not enforcement. + +**L4 enforcement model:** +``` +L4 is advisory by default. +Hook adapters MAY choose stronger semantics (pause/block) depending on runtime capabilities. +OpenLore itself never hard-blocks execution — not in V1, not in V2. +Execution interruption is a runtime policy decision, not a framework decision. +``` + +OpenLore emits signals. Runtimes decide what to do with them. This boundary is intentional: +OpenLore cannot verify that a block is safe or appropriate in context. Enforcement belongs +to the agent runtime that understands its execution model. + +Agent adapters translate `decision` + `severity` to agent-native semantics. + +**Hooks are best-effort runtime augmentations, not trusted enforcement boundaries.** +System correctness MUST NOT depend on hook execution. A hook that never fires must leave +the MCP flow fully functional. + +### Claude Code + +```json +{ + "hooks": { + "PreToolUse": [{ + "matcher": ".*", + "hooks": [{"type": "command", "command": "openlore panic-check --format claude"}] + }] + } +} +``` + +Installed automatically by `openlore setup --hooks claude`. + +### kilocode + +Plugin with `tool.execute.before`. Reads panic state directly from file to avoid CLI +spawn overhead. Interprets `severity: critical` as a throw (advisory block). +Distributed as built-in plugin or separate npm package. + +### Codex + +Identical hook format to Claude Code. Installed by `openlore setup --hooks codex`. + +### Performance + +Process spawn + Node startup + fs read + JSON parse per tool call adds 30–100ms depending +on machine. Acceptable in V1 for sequential tool calls; may cause noticeable stutter if +agent executes 10+ tools in rapid parallel bursts. + +**Critical V1 constraint:** The `panic-check` entry point in `src/cli/index.ts` MUST +short-circuit heavy dependency loading when the invoked command is `panic-check`. DB +drivers, analysis modules, and graph loaders MUST NOT be imported on this path. Only +`panic-state.json` read + JSON parse + output should execute. + +**Hook timeout:** Agent-side hook configuration MUST set a strict execution timeout +(recommended: 200ms). `panic-check` failing to respond within timeout MUST fail open — +tool execution proceeds as if no hook fired. A blocked `panic-check` process MUST NOT +freeze the agent runtime. + +**V2 optimization (not implemented):** `openlore-panicd` — persistent daemon, unix socket, +cached state, sub-millisecond reads. Implement only if V1 latency proves measurable in +practice. Likely to become a priority under daily use. + +--- + +## Gryph Integration (Optional) + +``` +Gryph integration MUST degrade gracefully to zero-impact absence semantics. +``` + +When `gryph` binary is absent or query fails: no signals added, no error, no log noise. + +**Configuration:** + +| Env var | Default | Purpose | +|---------|---------|---------| +| `OPENLORE_GRYPH_TIMEOUT_MS` | `150` | Per-query budget (ms). Both exec and write queries share this budget. Set higher on slow machines, lower if hook latency is a concern. Clamped to minimum 50ms. | + +Total Gryph latency budget ≤ `2 × OPENLORE_GRYPH_TIMEOUT_MS`. Add to the agent hook timeout calculation when Gryph is present. + +When present, `panic-check` queries: + +```bash +gryph query --format json --action exec --since +gryph query --format json --action write --since +``` + +Session scoped: matches Gryph session ID from `panic-state.json`. + +Signals consumed: repetitive shell retry bursts (via `commandEntropy`), contradiction +persistence (same failing test + no file trajectory change), large write events while stale. + +--- + +## Telemetry + +Domain: `panic-response.jsonl` + +**Rotation:** rotate at 50MB, keep last 5 files. Prevents unbounded growth from +high-frequency hook activity. + +**Sampling:** High-frequency hook telemetry MAY be sampled. Hook intercept events at L1 +with short cooldowns can be sampled at 10% without losing behavioral signal. + +| Event | Fields | +|-------|--------| +| `panic_elevated` | score, triggers[], agent | +| `reflective_checkpoint` | score, tool_name, channel, panicDelta, source | +| `planning_enforcement` | score, tool_name, channel, panicDelta, source | +| `scope_reduction` | score, tool_name, channel, panicDelta, source | +| `circuit_breaker` | score, stale_depth, channel, panicDelta, source | +| `panic_recovery` | score_before, score_after, via, latency_ms | +| `orient_spam_detected` | recentOrientCount, bonusApplied | + +**Source attribution:** Every score delta event includes: +```json +{"panicDelta": +15, "source": "trajectory_burst"} +``` +Per-source breakdown enables calibration of signal weights over time. + +`channel` field: `mcp` or `hook`. + +`openlore telemetry` additions: + +| Metric | Meaning | +|--------|---------| +| panic_episodes | distinct destabilization events (score crossed L1 up-threshold) | +| avg_recovery_latency_ms | time from first L1 to score below L1 down-threshold | +| failed_recovery_rate | episodes where score re-escalated after reaching Stable | +| hook_intercepts | interventions fired via hook (agent not calling openlore) | +| orient_spam_events | orient() calls that received reduced recovery bonus | + +--- + +## Non-Goals (initial version) + +- Hard blocking at L4 +- Agents beyond Claude Code, kilocode, Codex +- Gryph as a required dependency +- Psychological modeling, intent classification, prompt inspection +- `openlore-panicd` daemon (V2) +- Adaptive hook reinjection with semantic variation (V2, contract established above) +- Persistent panic state across sessions (each session starts fresh) diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index de0124a7..0ba161b2 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -1356,7 +1356,10 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { tracker = createTracker(directory); trackerDir = directory; } - // Update epistemic state before dispatch (orient resets tracker internally) + // Update epistemic state before dispatch (orient resets tracker internally). + // Invariant: only MCP tool calls (this path) feed panic. CLI commands (panic-check, + // telemetry) are separate processes that read state but never call updateTracker — + // no recursive panic feedback loop from openlore internal commands. if (tracker && directory) { updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); writePanicState(directory, trackerToPanicState(tracker, agentName)); diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts index 7dc17910..ec28e7a3 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -24,7 +24,7 @@ export interface GryphSignals { /** Low-entropy + repeated failing commands = destabilized shell activity. */ repetitiveRetryBurst: boolean; /** Any write event > 500 LOC detected in the time window. */ - largePatchWhileActive: boolean; + largePatchWhileStale: boolean; /** LOC count of the largest write event seen, 0 if none. */ largePatchLoc: number; } @@ -52,8 +52,9 @@ interface GryphWriteEvent { // CONSTANTS // ============================================================================ -const GRYPH_TIMEOUT_MS = 150; // hard budget per query; total ≤ 200ms -const GRYPH_DETECT_TIMEOUT_MS = 50; // PATH check +// OPENLORE_GRYPH_TIMEOUT_MS overrides the default 150ms per-query budget. +const GRYPH_TIMEOUT_MS = Math.max(50, Number(process.env['OPENLORE_GRYPH_TIMEOUT_MS'] ?? 150)); +const GRYPH_DETECT_TIMEOUT_MS = 50; // PATH check (not user-configurable — boot critical) const LARGE_PATCH_LOC_THRESHOLD = 500; const ENTROPY_LOW_THRESHOLD = 0.30; // below = low-diversity / retry-loop @@ -150,9 +151,9 @@ export function queryGryphSignals(since: string): GryphSignals | null { // Large patch: find max LOC write event const locs = writeEvents.map(e => e.lines ?? e.loc ?? e.additions ?? 0); const largePatchLoc = locs.length > 0 ? Math.max(...locs) : 0; - const largePatchWhileActive = largePatchLoc > LARGE_PATCH_LOC_THRESHOLD; + const largePatchWhileStale = largePatchLoc > LARGE_PATCH_LOC_THRESHOLD; - return { commandEntropy, repetitiveRetryBurst, largePatchWhileActive, largePatchLoc }; + return { commandEntropy, repetitiveRetryBurst, largePatchWhileStale, largePatchLoc }; } catch { return null; // always fail open } @@ -180,7 +181,7 @@ export function applyGryphDelta( triggers.push('repetitive_retry_burst'); } - if (signals.largePatchWhileActive && isStale) { + if (signals.largePatchWhileStale && isStale) { // Large patch attenuation: high entropy = deliberate refactor → +10, not +30 const attenuated = signals.commandEntropy > 0.60; delta += attenuated ? 10 : 30; From 95f2ecbe3b6a806e955e5469f5f23153ddde2801 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Wed, 20 May 2026 21:30:44 +0200 Subject: [PATCH 05/22] feat(panic): refractory period + provenance trace Refractory period: - Add panicRecoverySuppressionUntil to EpistemicTracker and PanicState - orient() sets 45s suppression window on any score-reducing call - Upward signals (trajectory_burst, oscillation_spike, stale_depth_3) skip during refractory; decay and locality recovery still apply - Written to state file so hook can apply guard without MCP round-trip Provenance trace: - updatePanic() now emits panic_score_delta on every score change with full per-trigger attribution: name, delta, evidence (measured values) - Separates trigger labels from evidence so threshold tuning is possible - in_refractory flag on events where suppression was active - panic_level_change events include full provenance array - tool name propagated from updateTracker to updatePanic for attribution Spec: - Document refractory period rationale and semantics - Add panic provenance trace format with trigger/evidence separation - Known Limitations section: oscillation fragility, productive chaos, goal coherence absence, hook non-mandatory invariant - V2 non-goals: convergence signals, productive refactor mode detection, goal coherence / task scope tracking --- openspec/changes/panic-response-layer.md | 96 +++++++++++++++- .../services/mcp-handlers/epistemic-lease.ts | 105 +++++++++++++----- .../services/mcp-handlers/panic-response.ts | 2 + 3 files changed, 171 insertions(+), 32 deletions(-) diff --git a/openspec/changes/panic-response-layer.md b/openspec/changes/panic-response-layer.md index ee2c74b4..56b1e274 100644 --- a/openspec/changes/panic-response-layer.md +++ b/openspec/changes/panic-response-layer.md @@ -243,6 +243,26 @@ large patch = likely panicked patching. Score clamped to `[0, 100]` after every operation. +### Refractory Period + +After orient() achieves a score reduction (`panicDelta < 0`), upward signals are suppressed +for `PANIC_REFRACTORY_MS` (45 seconds). Locality recovery and passive decay still apply. + +``` +panicRecoverySuppressionUntil = now + 45s (set by orient() on any score-reducing call) +``` + +During the refractory window: +- `trajectory_burst`, `oscillation_spike`, `stale_depth_3` → skipped +- `passive_decay`, `locality_recovery` → still applied + +This prevents panic from immediately re-escalating after recovery. Without it, a single burst +trajectory immediately after orient() would undo the recovery bonus before the agent has had +a chance to re-anchor. The 45s window matches orient() → first few tool calls latency. + +`panicRecoverySuppressionUntil` is stored in the state file (as ISO string, omitted when +not active) so the hook can apply the same guard without re-querying the MCP server. + ### orient() Spam Protection `orient()` recovery is diminishing to prevent gaming the reset mechanism: @@ -540,11 +560,34 @@ with short cooldowns can be sampled at 10% without losing behavioral signal. | `panic_recovery` | score_before, score_after, via, latency_ms | | `orient_spam_detected` | recentOrientCount, bonusApplied | -**Source attribution:** Every score delta event includes: +**Panic provenance trace.** Every `panic_score_delta` event includes full per-trigger +attribution with measured evidence, enabling calibration and faux positif analysis: + ```json -{"panicDelta": +15, "source": "trajectory_burst"} +{ + "event": "panic_score_delta", + "tool": "trace_execution_path", + "score_before": 42, + "score_after": 57, + "delta": 15, + "in_refractory": false, + "stale_depth": 3, + "density": 0.67, + "oscillation": 0.54, + "triggers": [ + { "name": "trajectory_burst", "delta": 15, "evidence": { "density": 0.67 } }, + { "name": "passive_decay", "delta": -5, "evidence": { "elapsed_min": 1.0 } } + ] +} ``` -Per-source breakdown enables calibration of signal weights over time. + +Separating "trigger" (the signal that fired) from "evidence" (the measured value that +activated it) is required for calibration. Without evidence, the log answers "what fired" +but not "why" — which makes threshold tuning impossible. + +`in_refractory: true` on events where upward signals were suppressed is critical for +detecting over-refractory situations (panic rising despite suppression is evidence that +the threshold is wrong or the window is too short). `channel` field: `mcp` or `hook`. @@ -560,9 +603,54 @@ Per-source breakdown enables calibration of signal weights over time. --- +## Known Limitations + +**Oscillation fragility.** `oscillation` alone is not sufficient. Back-and-forth between +two modules is normal in several productive patterns: + +``` +backend ↔ frontend +interface ↔ implementation +test ↔ fix (TDD) +caller ↔ callee +``` + +The real signal is `oscillation + no convergence`. V1 lacks a convergence signal. This will +produce faux positifs on legitimate paired workflows. Mitigation: oscillation threshold set +conservatively (0.50), require +density burst for L3+ transitions. V2 should add +convergence tracking (see below). + +**Productive chaos.** A large-scale refactor is behaviorally indistinguishable from a panic +episode: + +- many modules touched +- large writes +- broken builds +- repeated commands +- oscillation between test/impl +- trajectory density spikes + +`commandEntropy` mitigates this partially. High entropy + large patch = attenuated signal. +But monorepo traversal, rename cascades, and API sync are cases where `commandEntropy` stays +high AND trajectory density stays high — false panic guaranteed. V2 needs a "productive +refactor mode" signal (see below). + +**Goal coherence absent.** Current model measures movement, oscillation, and repetition +but not progression. A→B→C→D→E looks identical whether the agent is systematically +working through a refactor or drifting with no coherent goal. Without some notion of +`currentTaskScope` or objective tracking, the model cannot distinguish these. + +**Hook dependency.** The PreToolUse hook must NEVER become mandatory. If the hook is absent, +disabled, or times out, the MCP flow must proceed normally. System correctness must never +depend on hook execution. Runtimes may install the hook for observability; they must not +treat its absence as a failure condition. + ## Non-Goals (initial version) -- Hard blocking at L4 +- Hard blocking at L4 (advisory only, forever) +- Goal coherence / task scope tracking (V2 — requires agent protocol changes) +- Convergence signals (V2 — needs "new module frontier" and "same error recurrence" tracking) +- Productive refactor mode detection (V2 — expanding module frontier + low contradiction persistence) - Agents beyond Claude Code, kilocode, Codex - Gryph as a required dependency - Psychological modeling, intent classification, prompt inspection diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 5558e80d..7959735c 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -76,6 +76,8 @@ export interface EpistemicTracker { lastPanicUpdateAt: number; /** Accumulated signal trigger labels for the current panic episode. */ panicTriggers: string[]; + /** Epoch ms — upward panic signals suppressed until this time after orient() recovery. */ + panicRecoverySuppressionUntil: number; } // ============================================================================ @@ -176,6 +178,7 @@ const PANIC_TRAJECTORY_DENSITY = 0.60; // trajectory burst → +15 const PANIC_OSCILLATION_THRESHOLD = 0.50; // oscillation spike → +10 const PANIC_DECAY_PER_MIN = 5; // passive wall-clock decay const PANIC_LOCALITY_RECOVERY = 3; // per-call recovery when stable +const PANIC_REFRACTORY_MS = 45_000; // post-orient suppression window (45s) // ============================================================================ // PANIC UPDATE @@ -183,66 +186,103 @@ const PANIC_LOCALITY_RECOVERY = 3; // per-call recovery when stable // Score delta: positive from instability signals, negative from orient resets. // ============================================================================ +interface PanicProvenanceItem { + name: string; + delta: number; + evidence: Record; +} + function updatePanic( tracker: EpistemicTracker, - opts: { density: number; oscillation: number; weight: number; staleDepth: number; directory?: string }, + opts: { density: number; oscillation: number; weight: number; staleDepth: number; directory?: string; tool?: string }, ): void { - const { density, oscillation, staleDepth, directory = '' } = opts; + const { density, oscillation, staleDepth, directory = '', tool = '' } = opts; const now = Date.now(); + const inRefractory = tracker.panicRecoverySuppressionUntil > now; // Passive wall-clock decay: -5 per minute elapsed since last update const elapsedMin = tracker.lastPanicUpdateAt > 0 ? Math.max(0, (now - tracker.lastPanicUpdateAt) / 60_000) : 0; - const decay = Math.floor(elapsedMin * PANIC_DECAY_PER_MIN); + const decayDelta = -Math.floor(elapsedMin * PANIC_DECAY_PER_MIN); - // Per-call score delta from behavioral signals (spec §3.1) - let delta = -decay; - const callTriggers: string[] = []; - - if (density >= PANIC_TRAJECTORY_DENSITY) { - delta += 15; - callTriggers.push('trajectory_burst'); - } - if (oscillation >= PANIC_OSCILLATION_THRESHOLD) { - delta += 10; - callTriggers.push('oscillation_spike'); + let delta = decayDelta; + const provenance: PanicProvenanceItem[] = []; + if (decayDelta < 0) { + provenance.push({ name: 'passive_decay', delta: decayDelta, evidence: { elapsed_min: Math.round(elapsedMin * 100) / 100 } }); } - if (staleDepth >= 3) { - delta += 25; - callTriggers.push('stale_depth_3'); + + // Upward signals — suppressed during refractory period after orient() recovery + if (!inRefractory) { + if (density >= PANIC_TRAJECTORY_DENSITY) { + const d = 15; + delta += d; + provenance.push({ name: 'trajectory_burst', delta: d, evidence: { density } }); + } + if (oscillation >= PANIC_OSCILLATION_THRESHOLD) { + const d = 10; + delta += d; + provenance.push({ name: 'oscillation_spike', delta: d, evidence: { oscillation } }); + } + if (staleDepth >= 3) { + const d = 25; + delta += d; + provenance.push({ name: 'stale_depth_3', delta: d, evidence: { stale_depth: staleDepth } }); + } } - // Locality recovery: calm stable work reduces panic + // Locality recovery — always applies, not gated by refractory if (density < 0.10 && oscillation < 0.10 && staleDepth === 0) { - delta -= PANIC_LOCALITY_RECOVERY; - callTriggers.push('locality_recovery'); + const d = -PANIC_LOCALITY_RECOVERY; + delta += d; + provenance.push({ name: 'locality_recovery', delta: d, evidence: { density, oscillation } }); } + const scoreBefore = tracker.panicScore; tracker.lastPanicUpdateAt = now; tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + delta)); tracker.localityConfidence = Math.max(0, 1 - density * 2); - // Accumulate triggers for the current episode - for (const t of callTriggers) { + // Accumulate trigger names for the current episode (upward signals only) + const upwardTriggers = provenance.filter(p => p.delta > 0).map(p => p.name); + for (const t of upwardTriggers) { if (!tracker.panicTriggers.includes(t)) tracker.panicTriggers.push(t); } const prevLevel = tracker.panicLevel; tracker.panicLevel = applyPanicHysteresis(tracker.panicLevel, tracker.panicScore, staleDepth); + // Emit provenance trace whenever score changes with active signals + if (tracker.panicScore !== scoreBefore && provenance.length > 0) { + emit(directory, 'panic', { + event: 'panic_score_delta', + tool, + score_before: scoreBefore, + score_after: tracker.panicScore, + delta, + in_refractory: inRefractory, + stale_depth: staleDepth, + density, + oscillation, + triggers: provenance, + }); + } + if (tracker.panicLevel !== prevLevel) { - const trigger = staleDepth >= 2 && tracker.panicLevel > prevLevel ? 'ceiling' : 'score'; + const levelTrigger = staleDepth >= 2 && tracker.panicLevel > prevLevel ? 'ceiling' : 'score'; emit(directory, 'panic', { event: 'panic_level_change', + tool, from_level: prevLevel, to_level: tracker.panicLevel, + score_before: scoreBefore, panic_score: tracker.panicScore, density, oscillation, stale_depth: staleDepth, - trigger, - call_triggers: callTriggers, + in_refractory: inRefractory, + trigger: levelTrigger, + provenance, }); } @@ -389,6 +429,7 @@ export function createTracker(directory: string): EpistemicTracker { interventionCountSinceStable: 0, lastPanicUpdateAt: 0, panicTriggers: [], + panicRecoverySuppressionUntil: 0, }; } @@ -422,6 +463,11 @@ function resetTracker(tracker: EpistemicTracker, directory: string): void { tracker.interventionCountSinceStable = 0; tracker.panicTriggers = []; } + // Set refractory window when orient() achieves actual score reduction. + // Suppresses upward signals for 45s to let recovery land before re-escalating. + if (panicDelta < 0) { + tracker.panicRecoverySuppressionUntil = now + PANIC_REFRACTORY_MS; + } emit(directory, 'panic', { event: 'panic_orient_reset', @@ -503,7 +549,7 @@ export function updateTracker( density, oscillation, age_min: Math.floor(ageMs / 60_000), trigger: 'burst', }); tracker.staleDepth = 3; - updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory }); + updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory, tool: toolName }); return; } const newDepth = computeStaleDepth(tracker.cognitiveLoad, ageMs); @@ -515,7 +561,7 @@ export function updateTracker( }); tracker.staleDepth = newDepth as StaleDepth; } - updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory }); + updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory, tool: toolName }); return; } @@ -571,7 +617,7 @@ export function updateTracker( emit(directory, 'epistemic-lease', { event: 'degraded', trigger, ...telCtx }); } - updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory }); + updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory, tool: toolName }); } // ============================================================================ @@ -684,6 +730,9 @@ export function trackerToPanicState(tracker: EpistemicTracker, agentId?: string, localityConfidence: tracker.localityConfidence, interventionCountSinceStable: tracker.interventionCountSinceStable, triggers: [...tracker.panicTriggers], + panicRecoverySuppressionUntil: tracker.panicRecoverySuppressionUntil > Date.now() + ? new Date(tracker.panicRecoverySuppressionUntil).toISOString() + : undefined, agentId, sessionId, }; diff --git a/src/core/services/mcp-handlers/panic-response.ts b/src/core/services/mcp-handlers/panic-response.ts index 834c038b..91ccd893 100644 --- a/src/core/services/mcp-handlers/panic-response.ts +++ b/src/core/services/mcp-handlers/panic-response.ts @@ -30,6 +30,8 @@ export interface PanicState { localityConfidence: number; interventionCountSinceStable: number; triggers: string[]; + /** ISO — upward signals suppressed until this timestamp after an orient() recovery. */ + panicRecoverySuppressionUntil?: string; agentId?: string; sessionId?: string; } From 76119eb5214a3d49f450242d996cef26164e673d Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Wed, 20 May 2026 21:47:35 +0200 Subject: [PATCH 06/22] feat(panic): locality confidence modulation of panic and stale escalation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit localityConfidence now uses both density and oscillation (not just density): localityConfidence = (1 - min(1, density×2)) × (1 - min(1, oscillation)) Locality gates two signals: - stale_depth_3 (+25/call): only fires when localityConfidence < 0.5 - burst escalation (depth → 3): only fires when localityConfidence < 0.5 A stale agent doing focused local work (high confidence) is not in the same risk category as a stale agent drifting cross-module. Suppressing these signals prevents the panic layer from treating coherent deep work as a crisis. Spec: add Behavioral Space section documenting the five independent dimensions and the interpretation matrix (8 situations). Add Locality Confidence Modulation subsection with formula, gating table, and rationale for appropriate vs maximum tool utilization. --- openspec/changes/panic-response-layer.md | 51 +++++++++++++++++++ .../services/mcp-handlers/epistemic-lease.ts | 20 ++++++-- 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/openspec/changes/panic-response-layer.md b/openspec/changes/panic-response-layer.md index 56b1e274..030ab1aa 100644 --- a/openspec/changes/panic-response-layer.md +++ b/openspec/changes/panic-response-layer.md @@ -35,6 +35,33 @@ block agent operation. System correctness MUST NOT depend on hook execution. --- +## Behavioral Space + +Five independent dimensions describe agent state. They can be opposed: + +| Situation | Interpretation | +|-----------|---------------| +| low density + low entropy | focused local work (coherent, no intervention needed) | +| high density + high entropy | productive exploration (risky but coherent) | +| high density + low entropy | panic probable (drift, retry loop) | +| low density + high oscillation | local contradiction (A↔B without progress) | +| stale + low panic | deep stale dive (known risk, focused) | +| fresh + high panic | recent orient(), still confused | +| stale + high locality confidence | locally coherent stale work (low risk) | +| stale + low locality confidence + drift | architectural isolation risk | + +**Absence of openlore calls is not a failure signal.** An agent doing focused local work with +high locality confidence has low need for orient() or graph traversal. The working set fits +in active context. Tool utilization is a proxy — the target is *appropriate* tool utilization, +not maximum utilization. + +The dangerous case is not "0 openlore calls" but: +``` +many files + large patches + oscillation + retry loops + cross-module + failure traces +AND no openlore calls +``` +That is architectural isolation risk. Focused single-file work with no orient() is rational. + ## Freshness vs Panic — Explicit Separation These are independent dimensions of epistemic state: @@ -175,6 +202,30 @@ treated as stable state (fail open). | Oscillation spike (osc ≥ 0.50) | +10 | | Stale depth 3 persistence (each call) | +25 | +### Locality Confidence Modulation + +`localityConfidence ∈ [0,1]` is computed from both density and oscillation: + +``` +localityConfidence = (1 - min(1, density × 2)) × (1 - min(1, oscillation)) +``` + +High localityConfidence = sustained coherent local work. It modulates the panic system: + +| Signal | Gating | +|--------|--------| +| `stale_depth_3` (+25/call) | only fires when `localityConfidence < 0.5` | +| burst escalation (depth → 3) | only fires when `localityConfidence < 0.5` | +| locality recovery (−3/call) | fires when `density < 0.10 && oscillation < 0.10 && staleDepth = 0` | + +**Rationale:** a stale agent doing focused local work (`staleDepth = 3` but `localityConfidence = 0.9`) +is not in the same risk category as a stale agent drifting cross-module. Suppressing the +`stale_depth_3` signal in that case prevents the panic system from treating coherent deep +work as a destabilization event. + +This also means the system does NOT maximize orient() calls. It maximizes appropriate +recontextualization — only when the behavioral signals indicate it is actually needed. + **Trajectory tracking continues while stale.** Module access window and oscillation score accumulate during stale state so that post-stale burst and trajectory patterns remain observable. The stale state does not freeze the behavioral model. diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 7959735c..8757cb7b 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -212,6 +212,10 @@ function updatePanic( provenance.push({ name: 'passive_decay', delta: decayDelta, evidence: { elapsed_min: Math.round(elapsedMin * 100) / 100 } }); } + // localityConfidence captures sustained coherent local work — both density and oscillation + // independently degrade it. Computed here (before signal gating) so it can modulate signals. + const localityConfidence = Math.max(0, (1 - Math.min(1, density * 2)) * (1 - Math.min(1, oscillation))); + // Upward signals — suppressed during refractory period after orient() recovery if (!inRefractory) { if (density >= PANIC_TRAJECTORY_DENSITY) { @@ -224,10 +228,12 @@ function updatePanic( delta += d; provenance.push({ name: 'oscillation_spike', delta: d, evidence: { oscillation } }); } - if (staleDepth >= 3) { + // stale_depth_3 signal gated by localityConfidence: a stale agent doing focused local + // work (high confidence) is much less risky than a stale agent in behavioral drift. + if (staleDepth >= 3 && localityConfidence < 0.5) { const d = 25; delta += d; - provenance.push({ name: 'stale_depth_3', delta: d, evidence: { stale_depth: staleDepth } }); + provenance.push({ name: 'stale_depth_3', delta: d, evidence: { stale_depth: staleDepth, locality_confidence: localityConfidence } }); } } @@ -241,7 +247,7 @@ function updatePanic( const scoreBefore = tracker.panicScore; tracker.lastPanicUpdateAt = now; tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + delta)); - tracker.localityConfidence = Math.max(0, 1 - density * 2); + tracker.localityConfidence = localityConfidence; // Accumulate trigger names for the current episode (upward signals only) const upwardTriggers = provenance.filter(p => p.delta > 0).map(p => p.name); @@ -541,12 +547,16 @@ export function updateTracker( // Already stale — time-based depth escalation only, plus V3.2 burst sensitivity. // Load stops accumulating here; burst detection uses tool weight and density instead. if (tracker.freshnessState === 'stale') { - // Post-stale burst: heavy architectural tool or trajectory burst → immediate depth 3 - if (tracker.staleDepth < 3 && (weight >= BURST_TOOL_WEIGHT_THRESHOLD || density >= BURST_DENSITY_THRESHOLD)) { + // Post-stale burst: heavy architectural tool or trajectory burst → immediate depth 3. + // Gated by localityConfidence: a stale agent doing focused local work is not bursting. + // High confidence (≥0.5) suppresses burst escalation — only clear behavioral drift triggers it. + const isBurst = weight >= BURST_TOOL_WEIGHT_THRESHOLD || density >= BURST_DENSITY_THRESHOLD; + if (tracker.staleDepth < 3 && isBurst && tracker.localityConfidence < 0.5) { emit(directory, 'epistemic-lease', { event: 'depth_escalate', from_depth: tracker.staleDepth, to_depth: 3, tool: toolName, module: mod, cognitive_load: tracker.cognitiveLoad, density, oscillation, age_min: Math.floor(ageMs / 60_000), trigger: 'burst', + locality_confidence: tracker.localityConfidence, }); tracker.staleDepth = 3; updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory, tool: toolName }); From 2aa5b5ba6914010ae34735f189f95e1d2c834c1b Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Wed, 20 May 2026 21:57:42 +0200 Subject: [PATCH 07/22] test(telemetry): unit tests for panic/lease metric aggregation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export computePanicStats, computeRecovery, computeObstinacy, and their types for direct testing. 24 tests covering: - computePanicStats: episode counting, avg_recovery_ms (completed only), failed_recovery_rate, hook_intercepts, mcp_injections, orient spam/rapid, gryph_enriched_intercepts, trigger frequency + sort order, out-of-order chronological events, non-level-change event isolation - computeRecovery: stale→orient latency avg, multi-pair avg, null when no subsequent orient, recovery half-life, recurrence rate, edge cases - computeObstinacy: tool call counting per episode, depth tracking, open episode handling, multiple episode separation --- src/cli/commands/telemetry.test.ts | 328 +++++++++++++++++++++++++++++ src/cli/commands/telemetry.ts | 4 + 2 files changed, 332 insertions(+) create mode 100644 src/cli/commands/telemetry.test.ts diff --git a/src/cli/commands/telemetry.test.ts b/src/cli/commands/telemetry.test.ts new file mode 100644 index 00000000..b912fe93 --- /dev/null +++ b/src/cli/commands/telemetry.test.ts @@ -0,0 +1,328 @@ +/** + * Validates panic/lease telemetry metric aggregation with synthetic JSONL events. + * Tests computePanicStats, computeRecovery, computeObstinacy directly. + */ + +import { describe, it, expect } from 'vitest'; +import { computePanicStats, computeRecovery, computeObstinacy } from './telemetry.js'; +import type { PanicEvent, LeaseEvent, McpEvent } from './telemetry.js'; + +// ── helpers ────────────────────────────────────────────────────────────────── + +function ts(offsetMs: number = 0): string { + return new Date(1_700_000_000_000 + offsetMs).toISOString(); +} + +function levelChange(from: number, to: number, offsetMs: number, extra?: Partial): PanicEvent { + return { ts: ts(offsetMs), event: 'panic_level_change', from_level: from, to_level: to, ...extra }; +} + +function orientReset(kind: 'normal' | 'rapid' | 'spam', offsetMs: number): PanicEvent { + return { ts: ts(offsetMs), event: 'panic_orient_reset', orient_kind: kind }; +} + +function hookIntervention(offsetMs: number, gryph = false, count = 1): PanicEvent { + return { ts: ts(offsetMs), event: 'hook_intervention', intervention_count: count, gryph_enriched: gryph }; +} + +function injection(offsetMs: number): PanicEvent { + return { ts: ts(offsetMs), event: 'panic_signal_injected' }; +} + +// ── computePanicStats ──────────────────────────────────────────────────────── + +describe('computePanicStats', () => { + it('returns zeros on empty input', () => { + const r = computePanicStats([]); + expect(r.panic_episodes).toBe(0); + expect(r.avg_recovery_ms).toBeNull(); + expect(r.failed_recovery_rate).toBe('—'); + expect(r.hook_intercepts).toBe(0); + expect(r.mcp_injections).toBe(0); + expect(r.orient_spam_events).toBe(0); + expect(r.orient_rapid_events).toBe(0); + expect(r.gryph_enriched_intercepts).toBe(0); + expect(r.trigger_counts).toHaveLength(0); + }); + + it('counts a completed episode (0→L2→0)', () => { + const events: PanicEvent[] = [ + levelChange(0, 2, 0), + levelChange(2, 0, 60_000), + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.avg_recovery_ms).toBe(60_000); + expect(r.failed_recovery_rate).toBe('0/1'); + }); + + it('measures avg recovery latency over multiple completed episodes', () => { + const events: PanicEvent[] = [ + levelChange(0, 1, 0), + levelChange(1, 0, 30_000), // 30s episode + levelChange(0, 3, 100_000), + levelChange(3, 0, 190_000), // 90s episode + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(2); + expect(r.avg_recovery_ms).toBe(60_000); // (30000 + 90000) / 2 + }); + + it('tracks peak level within an episode', () => { + // level escalates within episode + const events: PanicEvent[] = [ + levelChange(0, 1, 0), + levelChange(1, 3, 10_000), // escalation mid-episode + levelChange(3, 0, 60_000), + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.failed_recovery_rate).toBe('0/1'); + }); + + it('counts failed recovery: episode with no return to L0', () => { + const events: PanicEvent[] = [ + levelChange(0, 2, 0), + // no return to 0 + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.failed_recovery_rate).toBe('1/1'); + expect(r.avg_recovery_ms).toBeNull(); // no completed episodes + }); + + it('mixed: 1 completed + 1 failed → correct rate and avg', () => { + const events: PanicEvent[] = [ + levelChange(0, 2, 0), + levelChange(2, 0, 45_000), // completed: 45s + levelChange(0, 3, 200_000), // new episode, never closes + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(2); + expect(r.failed_recovery_rate).toBe('1/2'); + expect(r.avg_recovery_ms).toBe(45_000); // only completed episode + }); + + it('counts hook intercepts and mcp injections', () => { + const events: PanicEvent[] = [ + hookIntervention(0), + hookIntervention(5_000), + injection(10_000), + injection(15_000), + injection(20_000), + ]; + const r = computePanicStats(events); + expect(r.hook_intercepts).toBe(2); + expect(r.mcp_injections).toBe(3); + }); + + it('counts orient spam and rapid events', () => { + const events: PanicEvent[] = [ + orientReset('normal', 0), + orientReset('rapid', 30_000), + orientReset('rapid', 60_000), + orientReset('spam', 90_000), + orientReset('spam', 120_000), + ]; + const r = computePanicStats(events); + expect(r.orient_spam_events).toBe(2); + expect(r.orient_rapid_events).toBe(2); + }); + + it('counts gryph-enriched hook intercepts', () => { + const events: PanicEvent[] = [ + hookIntervention(0, false), + hookIntervention(5_000, true), + hookIntervention(10_000, true), + ]; + const r = computePanicStats(events); + expect(r.hook_intercepts).toBe(3); + expect(r.gryph_enriched_intercepts).toBe(2); + }); + + it('aggregates trigger frequency from call_triggers', () => { + const events: PanicEvent[] = [ + { ts: ts(0), event: 'hook_intervention', call_triggers: ['trajectory_burst', 'oscillation_spike'] }, + { ts: ts(5_000), event: 'hook_intervention', call_triggers: ['trajectory_burst'] }, + { ts: ts(10_000), event: 'hook_intervention', call_triggers: ['stale_depth_3'] }, + ]; + const r = computePanicStats(events); + const tmap = new Map(r.trigger_counts); + expect(tmap.get('trajectory_burst')).toBe(2); + expect(tmap.get('oscillation_spike')).toBe(1); + expect(tmap.get('stale_depth_3')).toBe(1); + // sorted descending by count + expect(r.trigger_counts[0][0]).toBe('trajectory_burst'); + }); + + it('handles level changes that arrive out of chronological order', () => { + // sort should handle this + const events: PanicEvent[] = [ + levelChange(2, 0, 60_000), // end of episode (arrives first in array) + levelChange(0, 2, 0), // start + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(1); + expect(r.avg_recovery_ms).toBe(60_000); + expect(r.failed_recovery_rate).toBe('0/1'); + }); + + it('ignores non-level-change events for episode tracking', () => { + const events: PanicEvent[] = [ + hookIntervention(0), + orientReset('normal', 10_000), + injection(20_000), + // no level changes → no episodes + ]; + const r = computePanicStats(events); + expect(r.panic_episodes).toBe(0); + expect(r.avg_recovery_ms).toBeNull(); + }); +}); + +// ── computeRecovery ─────────────────────────────────────────────────────────── + +describe('computeRecovery', () => { + function staleEvent(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'stale', depth: 1 }; + } + function orientReset(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'orient_reset', prior_load: 0, prior_depth: 1 }; + } + function orientCall(offsetMs: number): McpEvent { + return { ts: ts(offsetMs), event: 'tool_call', tool: 'orient', ms: 50 }; + } + function toolCall(name: string, offsetMs: number): McpEvent { + return { ts: ts(offsetMs), event: 'tool_call', tool: name, ms: 20 }; + } + function degraded(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'degraded' }; + } + + it('computes avg stale→orient latency', () => { + const lease: LeaseEvent[] = [staleEvent(0)]; + const mcp: McpEvent[] = [orientCall(45_000)]; + const r = computeRecovery(mcp, lease); + expect(r.avg_recovery_ms).toBe(45_000); + expect(r.stale_events).toBe(1); + expect(r.orient_calls).toBe(1); + }); + + it('averages latency over multiple stale→orient pairs', () => { + const lease: LeaseEvent[] = [staleEvent(0), staleEvent(100_000)]; + const mcp: McpEvent[] = [orientCall(60_000), orientCall(130_000)]; + const r = computeRecovery(mcp, lease); + expect(r.avg_recovery_ms).toBe(45_000); // (60000 + 30000) / 2 + }); + + it('returns null avg when no stale event has a subsequent orient', () => { + const lease: LeaseEvent[] = [staleEvent(100_000)]; + const mcp: McpEvent[] = [orientCall(0)]; // orient before stale + const r = computeRecovery(mcp, lease); + expect(r.avg_recovery_ms).toBeNull(); + }); + + it('computes recovery half-life (orient_reset → next degradation)', () => { + const lease: LeaseEvent[] = [orientReset(0), degraded(90_000)]; + const mcp: McpEvent[] = []; + const r = computeRecovery(mcp, lease); + expect(r.avg_stable_after_orient_ms).toBe(90_000); + }); + + it('returns null half-life when no degradation follows reset', () => { + const lease: LeaseEvent[] = [orientReset(0)]; + const mcp: McpEvent[] = []; + const r = computeRecovery(mcp, lease); + expect(r.avg_stable_after_orient_ms).toBeNull(); + }); + + it('computes correct recurrence rate', () => { + const lease: LeaseEvent[] = [staleEvent(0), staleEvent(200_000)]; + const mcp: McpEvent[] = [orientCall(100_000)]; + const r = computeRecovery(mcp, lease); + expect(r.stale_events).toBe(2); + expect(r.orient_calls).toBe(1); + expect(r.recurrence_rate).toBe('2.00 stale/orient'); + }); + + it('returns — for recurrence rate when no orients', () => { + const lease: LeaseEvent[] = [staleEvent(0)]; + const mcp: McpEvent[] = []; + const r = computeRecovery(mcp, lease); + expect(r.recurrence_rate).toBe('—'); + }); +}); + +// ── computeObstinacy ───────────────────────────────────────────────────────── + +describe('computeObstinacy', () => { + function staleEvent(depth: number, offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'stale', depth }; + } + function orientResetEvent(offsetMs: number): LeaseEvent { + return { ts: ts(offsetMs), event: 'orient_reset' }; + } + function toolCall(name: string, offsetMs: number): McpEvent { + return { ts: ts(offsetMs), event: 'tool_call', tool: name, ms: 10 }; + } + + it('returns zeros on empty input', () => { + const r = computeObstinacy([], []); + expect(r.total_stale_episodes).toBe(0); + expect(r.avg_calls_before_orient).toBe('—'); + }); + + it('counts tool calls between stale and orient_reset', () => { + const lease: LeaseEvent[] = [staleEvent(1, 0), orientResetEvent(50_000)]; + const mcp: McpEvent[] = [ + toolCall('search_code', 10_000), + toolCall('get_subgraph', 20_000), + toolCall('orient', 50_000), // orient itself, counts as orient kind + ]; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(1); + // 2 non-orient tool calls before orient_reset + expect(r.episodes[0].calls_before_orient).toBe(2); + }); + + it('tracks max depth within episode', () => { + const lease: LeaseEvent[] = [ + staleEvent(1, 0), + staleEvent(2, 10_000), // depth escalation mid-episode + orientResetEvent(60_000), + ]; + const mcp: McpEvent[] = []; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(1); + expect(r.episodes[0].depth).toBe(2); + }); + + it('counts open episode (no orient at end) as last segment', () => { + const lease: LeaseEvent[] = [staleEvent(1, 0)]; + const mcp: McpEvent[] = [ + toolCall('search_code', 10_000), + toolCall('get_subgraph', 20_000), + ]; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(1); + expect(r.episodes[0].calls_before_orient).toBe(2); + }); + + it('handles multiple separate stale episodes', () => { + const lease: LeaseEvent[] = [ + staleEvent(1, 0), + orientResetEvent(30_000), + staleEvent(2, 60_000), + orientResetEvent(90_000), + ]; + const mcp: McpEvent[] = [ + toolCall('search_code', 10_000), + toolCall('search_code', 70_000), + toolCall('search_code', 80_000), + ]; + const r = computeObstinacy(mcp, lease); + expect(r.total_stale_episodes).toBe(2); + expect(r.episodes[0].calls_before_orient).toBe(1); + expect(r.episodes[1].calls_before_orient).toBe(2); + }); +}); diff --git a/src/cli/commands/telemetry.ts b/src/cli/commands/telemetry.ts index 05ae278c..2cd57b10 100644 --- a/src/cli/commands/telemetry.ts +++ b/src/cli/commands/telemetry.ts @@ -199,6 +199,10 @@ function computeRecovery(mcp: McpEvent[], lease: LeaseEvent[]) { }; } +// Exported for testing +export type { PanicEvent, LeaseEvent, McpEvent }; +export { computePanicStats, computeRecovery, computeObstinacy }; + /** * Panic stats: episode count, avg recovery latency, hook intercepts, orient spam. */ From 67e8a84e1dfe2a4bc8ef449c1e08c75b7e914e9d Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Thu, 21 May 2026 19:49:42 +0200 Subject: [PATCH 08/22] test(panic): signal detection, refractory, locality, burst escalation tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also fix computeOscillationScore to operate over transition sequence — A→A→A→A was scoring 1.0 (same as A→B→A→B confusion loop) because the old formula checked bigrams across the full window. Focused single-module work should produce oscillation=0. --- .../mcp-handlers/epistemic-lease.test.ts | 207 ++++++++++++++++++ .../services/mcp-handlers/epistemic-lease.ts | 16 +- 2 files changed, 218 insertions(+), 5 deletions(-) diff --git a/src/core/services/mcp-handlers/epistemic-lease.test.ts b/src/core/services/mcp-handlers/epistemic-lease.test.ts index 44bb32b9..e214196a 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.test.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.test.ts @@ -740,6 +740,213 @@ describe('panic — orient spam protection', () => { }); }); +// ============================================================================ +// Panic — signal detection (trajectory_burst, oscillation_spike, stale_depth_3) +// ============================================================================ + +describe('panic — individual signal detection', () => { + beforeEach(() => { vi.useFakeTimers(); }); + afterEach(() => { vi.useRealTimers(); }); + + it('trajectory_burst (+15) fires when density >= 0.60', () => { + const t = freshTracker(); + t.panicScore = 0; + // Fill window with dense cross-module switching (10 distinct modules in 15 slots) + t.moduleAccessWindow = [ + 'a','b','c','d','e','f','g','h','i','j','a','b','c','d','e', + ] as (string|null)[]; + t.lastModule = 'e'; + // density = 14 switches / 15 = 0.93 → trajectory_burst fires + updateTracker(t, 'search_code', '/fake/repo', 'src/f/x.ts'); + // +15 trajectory_burst (oscillation may also add +10 if ≥0.50) + expect(t.panicScore).toBeGreaterThanOrEqual(15); + }); + + it('oscillation_spike (+10) fires when oscillation >= 0.50', () => { + const t = freshTracker(); + t.panicScore = 0; + // Pure A→B bigram repetition → oscillation = 1.0 + const window: (string|null)[] = []; + for (let i = 0; i < 14; i++) window.push(i % 2 === 0 ? 'auth' : 'billing'); + t.moduleAccessWindow = window; + t.lastModule = 'billing'; + // This call adds 'auth', creating another A→B→A bigram → oscillation stays high + updateTracker(t, 'search_code', '/fake/repo', 'src/auth/x.ts'); + // oscillation_spike (+10) + trajectory_burst (+15) both fire + expect(t.panicScore).toBeGreaterThanOrEqual(10); + }); + + it('stale_depth_3 (+25) fires when staleDepth=3 AND localityConfidence < 0.5', () => { + const t = freshTracker(); + t.panicScore = 0; + t.freshnessState = 'stale'; + t.staleDepth = 3; + // Build low localityConfidence via high density + oscillation in window + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.localityConfidence = 0.1; // already low from previous calls — gate should open + updateTracker(t, 'search_code', '/fake/repo', 'src/c/x.ts'); + // trajectory_burst + oscillation_spike + stale_depth_3 all fire + expect(t.panicScore).toBeGreaterThanOrEqual(25); + }); + + it('stale_depth_3 does NOT fire when localityConfidence >= 0.5 (focused stale work)', () => { + const t = freshTracker(); + t.panicScore = 0; + t.freshnessState = 'stale'; + t.staleDepth = 3; + // Empty window → density=0, oscillation=0 → localityConfidence=1.0 + t.moduleAccessWindow = []; + t.localityConfidence = 1.0; + updateTracker(t, 'search_code', '/fake/repo'); // no filePath → stays in same module + // stale_depth_3 gate blocked; only decay/locality_recovery may apply + // score should not increase (no upward signals fire at high localityConfidence) + expect(t.panicScore).toBe(0); + }); + + it('locality_recovery (-3) fires when density < 0.10, oscillation < 0.10, staleDepth = 0', () => { + const t = freshTracker(); + t.panicScore = 20; + t.moduleAccessWindow = []; // empty → density=0, oscillation=0 + t.localityConfidence = 1.0; + t.staleDepth = 0; + updateTracker(t, 'search_code', '/fake/repo'); // no cross-module activity + // locality_recovery (-3) fires; panicScore should drop + expect(t.panicScore).toBeLessThan(20); + }); +}); + +// ============================================================================ +// Panic — refractory period +// ============================================================================ + +describe('panic — refractory period after orient()', () => { + beforeEach(() => { vi.useFakeTimers(); }); + afterEach(() => { vi.useRealTimers(); }); + + it('orient() sets panicRecoverySuppressionUntil when score reduces', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // -40 → score=10, refractory set + expect(t.panicRecoverySuppressionUntil).toBeGreaterThan(Date.now()); + }); + + it('spam orient (delta=0) does NOT set refractory', () => { + const t = freshTracker(); + t.panicScore = 50; + t.lastOrientResetAt = Date.now() - 30_000; + t.recentOrientCount = 2; // 3rd rapid → spam → delta=0 + updateTracker(t, 'orient', '/fake/repo'); + expect(t.panicRecoverySuppressionUntil).toBe(0); // not set + }); + + it('upward signals suppressed during refractory window', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // sets refractory + const scoreAfterOrient = t.panicScore; + + // Now trigger high density + oscillation conditions + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + t.freshnessState = 'stale'; + t.staleDepth = 3; + updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); + + // Upward signals blocked by refractory — score should not increase above post-orient value + // (may decrease from decay/locality_recovery, but not increase) + expect(t.panicScore).toBeLessThanOrEqual(scoreAfterOrient); + }); + + it('upward signals resume after refractory window expires', () => { + const t = freshTracker(); + t.panicScore = 50; + vi.advanceTimersByTime(3 * 60 * 1000); + updateTracker(t, 'orient', '/fake/repo'); // sets refractory + const scoreAfterOrient = t.panicScore; + + // Advance past the 45s refractory window + vi.advanceTimersByTime(50_000); + t.panicRecoverySuppressionUntil = Date.now() - 1; // force expiry + + // Now trigger burst conditions + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + t.localityConfidence = 0.0; + updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); + + // Signals should now fire → score increases + expect(t.panicScore).toBeGreaterThan(scoreAfterOrient); + }); +}); + +// ============================================================================ +// Panic — localityConfidence formula +// ============================================================================ + +describe('panic — localityConfidence formula', () => { + it('high oscillation alone degrades localityConfidence even at low density', () => { + const t = freshTracker(); + // Fill with same-module oscillation: stays in 'auth', no cross-module switches + // but builds up bigram repetition + t.moduleAccessWindow = ['auth','auth','auth','auth','auth','auth','auth','auth', + 'auth','auth','auth','auth','auth','auth','auth'] as (string|null)[]; + // density = 0 (no switches), oscillation = 0 (same module, no bigram repetition) + updateTracker(t, 'search_code', '/fake/repo', 'src/auth/x.ts'); + // low density, low oscillation (all same module) → localityConfidence near 1 + expect(t.localityConfidence).toBeGreaterThan(0.9); + }); + + it('high density alone degrades localityConfidence', () => { + const t = freshTracker(); + // Dense cross-module switching, no oscillation (linear A→B→C→D) + t.moduleAccessWindow = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o'] as (string|null)[]; + t.lastModule = 'o'; + updateTracker(t, 'search_code', '/fake/repo', 'src/p/x.ts'); + // High density → localityConfidence degrades toward 0 + expect(t.localityConfidence).toBeLessThan(0.2); + }); + + it('both density and oscillation combine multiplicatively', () => { + const t = freshTracker(); + // A→B oscillation (high both density and oscillation) + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + updateTracker(t, 'search_code', '/fake/repo', 'src/b/x.ts'); + // Both density and oscillation high → confidence very low (multiplicative kill) + expect(t.localityConfidence).toBeLessThan(0.1); + }); +}); + +// ============================================================================ +// Panic — burst escalation gated by localityConfidence +// ============================================================================ + +describe('panic — burst escalation gate', () => { + it('burst (heavy tool on stale) does NOT escalate to depth 3 at high localityConfidence', () => { + const t = freshTracker(); + t.freshnessState = 'stale'; + t.staleDepth = 1; + t.localityConfidence = 0.9; // high confidence — focused work + t.moduleAccessWindow = []; // empty → density=0 + // trace_execution_path has weight 8 → burst condition met (weight >= BURST_TOOL_WEIGHT_THRESHOLD) + updateTracker(t, 'trace_execution_path', '/fake/repo'); + // Burst escalation blocked by high localityConfidence + expect(t.staleDepth).toBeLessThan(3); + }); + + it('burst escalates to depth 3 when localityConfidence < 0.5', () => { + const t = freshTracker(); + t.freshnessState = 'stale'; + t.staleDepth = 1; + t.localityConfidence = 0.1; // low confidence — drift + // trace_execution_path (weight=8) → burst condition met + updateTracker(t, 'trace_execution_path', '/fake/repo'); + expect(t.staleDepth).toBe(3); + }); +}); + // ============================================================================ // trackerToPanicState // ============================================================================ diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 8757cb7b..69ff7921 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -388,13 +388,19 @@ function computeCrossModuleDensity(window: (string | null)[]): number { function computeOscillationScore(window: (string | null)[]): number { const modules = window.filter((m): m is string => m !== null); if (modules.length < 3) return 0; + // Compute over transition sequence (entries where module actually changed). + // A→A→A→A → 0 transitions → oscillation = 0 (focused local work, not confusion). + // A→B→A→B → transitions [A,B,A,B] → oscillation = 1.0 (pure confusion loop). + const transitions: string[] = [modules[0]!]; + for (let i = 1; i < modules.length; i++) { + if (modules[i] !== modules[i - 1]) transitions.push(modules[i]!); + } + if (transitions.length < 3) return 0; let repeated = 0; - let total = 0; - for (let i = 2; i < modules.length; i++) { - total++; - if (modules[i] === modules[i - 2]) repeated++; + for (let i = 2; i < transitions.length; i++) { + if (transitions[i] === transitions[i - 2]) repeated++; } - return total > 0 ? repeated / total : 0; + return repeated / (transitions.length - 2); } // ============================================================================ From 80a8abb24dcf6619c7476ce34bccfd169f471ab7 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Thu, 21 May 2026 21:33:10 +0200 Subject: [PATCH 09/22] feat(panic): opt-in panic response with graduated mode ladder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add PanicResponseMode ('off'|'telemetry'|'advisory'|'experimental_blocking') to OpenLoreConfig. Default: 'off'. Existing users unaffected. Architecture: - Policy lives in mcp.ts only — never stored in EpistemicTracker or panic-state.json - updatePanic() extracted from updateTracker() and exported; mcp.ts calls it conditionally based on policy mode - localityConfidence moved into updateTracker() — it's navigation state, not panic - tracker.density stored after each updateTracker() so mcp.ts can pass it to updatePanic() Mode semantics: - off: zero panic overhead (no scoring, no file writes, no Gryph) - telemetry: scoring + state file, no agent impact - advisory: + injection into responses at L2+, hook exits 0 always - experimental_blocking: + hook emits {"decision":"block"} at L4, still exits 0 panic-check always exits 0 — fail-open invariant. Runtime decides enforcement. setup --panic sets config; --hooks remains independent. --- src/cli/commands/mcp.ts | 24 +++- src/cli/commands/panic-check.ts | 105 +++++++++++------- src/cli/commands/setup.ts | 26 ++++- src/core/services/config-manager.ts | 1 + .../mcp-handlers/epistemic-lease.test.ts | 45 +++++--- .../services/mcp-handlers/epistemic-lease.ts | 20 ++-- src/types/index.ts | 11 ++ 7 files changed, 163 insertions(+), 69 deletions(-) diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index 0ba161b2..5485ee85 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -31,10 +31,12 @@ import { } from '@modelcontextprotocol/sdk/types.js'; import { sanitizeMcpError, validateDirectory } from '../../core/services/mcp-handlers/utils.js'; -import { createTracker, updateTracker, getFreshnessSignal, trackerToPanicState } from '../../core/services/mcp-handlers/epistemic-lease.js'; +import { createTracker, updateTracker, updatePanic, getFreshnessSignal, trackerToPanicState } from '../../core/services/mcp-handlers/epistemic-lease.js'; import type { EpistemicTracker } from '../../core/services/mcp-handlers/epistemic-lease.js'; +import type { PanicResponseMode } from '../../types/index.js'; import { writePanicState, getPanicSignalText } from '../../core/services/mcp-handlers/panic-response.js'; import { emit } from '../../core/services/telemetry.js'; +import { readOpenLoreConfig } from '../../core/services/config-manager.js'; import { DEFAULT_DRIFT_MAX_FILES } from '../../constants.js'; import { handleGetCallGraph, @@ -1307,6 +1309,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // Per-session epistemic lease tracker — re-initialized when directory changes. let tracker: EpistemicTracker | undefined; let trackerDir = ''; + let panicPolicy: PanicResponseMode = 'off'; // --watch-auto: start the watcher on the first tool call that carries a directory let autoWatcher: import('../../core/services/mcp-watcher.js').McpWatcher | undefined; @@ -1355,6 +1358,11 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { if (directory && (!tracker || directory !== trackerDir)) { tracker = createTracker(directory); trackerDir = directory; + const cfg = await readOpenLoreConfig(directory); + panicPolicy = cfg?.panicResponse?.mode ?? 'off'; + if (panicPolicy !== 'off') { + emit(directory, 'panic-response', { event: 'panic_mode_active', mode: panicPolicy }); + } } // Update epistemic state before dispatch (orient resets tracker internally). // Invariant: only MCP tool calls (this path) feed panic. CLI commands (panic-check, @@ -1362,7 +1370,17 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // no recursive panic feedback loop from openlore internal commands. if (tracker && directory) { updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); - writePanicState(directory, trackerToPanicState(tracker, agentName)); + if (panicPolicy !== 'off') { + updatePanic(tracker, { + density: tracker.density, + oscillation: tracker.oscillation, + weight: 1, // weight read from TOOL_WEIGHTS inside updatePanic via opts — set baseline here + staleDepth: tracker.staleDepth, + directory, + tool: name, + }); + writePanicState(directory, trackerToPanicState(tracker, agentName)); + } } let result: unknown; @@ -1565,7 +1583,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { content.push({ type: 'text', text }); if (signal && !signal.prepend) content.push({ type: 'text', text: signal.text }); - if (tracker) { + if (tracker && (panicPolicy === 'advisory' || panicPolicy === 'experimental_blocking')) { const panicState = trackerToPanicState(tracker, agentName); const panicText = getPanicSignalText(panicState); if (panicText) { diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index abd2f565..eb17f89e 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -12,6 +12,7 @@ import { Command } from 'commander'; import { readPanicState, writePanicState, buildPanicCheckOutput } from '../../core/services/mcp-handlers/panic-response.js'; import { queryGryphSignals, applyGryphDelta } from '../../core/services/mcp-handlers/gryph-bridge.js'; +import { readOpenLoreConfig } from '../../core/services/config-manager.js'; import { emit } from '../../core/services/telemetry.js'; type HookFormat = 'claude' | 'kilo' | 'codex'; @@ -20,53 +21,75 @@ export const panicCheckCommand = new Command('panic-check') .description('Check current panic level (PreToolUse hook consumer)') .option('-d, --directory ', 'Project directory', process.cwd()) .option('-f, --format ', 'Hook format: claude|kilo|codex', 'claude') - .action((options: { directory: string; format: string }) => { - const dir = options.directory; - const format = options.format as HookFormat; - let state = readPanicState(dir); + .action(async (options: { directory: string; format: string }) => { + try { + const dir = options.directory; + const format = options.format as HookFormat; - // Gryph enrichment — fail-open, query from lastOrientAt (or 15min ago if absent) - const since = state.lastOrientAt ?? new Date(Date.now() - 15 * 60 * 1000).toISOString(); - const gryphSignals = queryGryphSignals(since); - if (gryphSignals) { - const enrichedTriggers = [...state.triggers]; - const enrichedScore = applyGryphDelta( - state.panicScore, - gryphSignals, - state.panicLevel >= 2, // isStale when at L2+ - enrichedTriggers, - ); - if (enrichedScore !== state.panicScore) { - state = { + // Policy gate — config is single source of truth + const cfg = await readOpenLoreConfig(dir); + const mode = cfg?.panicResponse?.mode ?? 'off'; + + if (mode === 'off' || mode === 'telemetry') { + // Panic disabled or telemetry-only: hook passes through silently + process.exit(0); + } + + let state = readPanicState(dir); + + // Gryph enrichment — fail-open, query from lastOrientAt (or 15min ago if absent) + const since = state.lastOrientAt ?? new Date(Date.now() - 15 * 60 * 1000).toISOString(); + const gryphSignals = queryGryphSignals(since); + if (gryphSignals) { + const enrichedTriggers = [...state.triggers]; + const enrichedScore = applyGryphDelta( + state.panicScore, + gryphSignals, + state.panicLevel >= 2, // isStale when at L2+ + enrichedTriggers, + ); + if (enrichedScore !== state.panicScore) { + state = { + ...state, + panicScore: enrichedScore, + triggers: enrichedTriggers, + }; + } + } + + const output = buildPanicCheckOutput(state); + + if (output.decision === 'warn') { + const newCount = state.interventionCountSinceStable + 1; + writePanicState(dir, { ...state, - panicScore: enrichedScore, - triggers: enrichedTriggers, - }; + lastHookInterventionAt: new Date().toISOString(), + interventionCountSinceStable: newCount, + }); + emit(dir, 'panic', { + event: 'hook_intervention', + channel: 'pre_tool_use', + format, + panic_level: state.panicLevel, + severity: output.severity, + directive_mode: newCount >= 3, + intervention_count: newCount, + gryph_enriched: gryphSignals !== null, + }); } - } - const output = buildPanicCheckOutput(state); + // experimental_blocking: emit block signal at L4 — runtime decides enforcement. + // OpenLore always exits 0. This is NOT the same as an advisory level. + if (mode === 'experimental_blocking' && state.panicLevel >= 4) { + const blockOutput = { decision: 'block' as const, panicLevel: state.panicLevel, message: output.message }; + process.stdout.write(JSON.stringify(blockOutput) + '\n'); + process.exit(0); + } - if (output.decision === 'warn') { - const newCount = state.interventionCountSinceStable + 1; - writePanicState(dir, { - ...state, - lastHookInterventionAt: new Date().toISOString(), - interventionCountSinceStable: newCount, - }); - emit(dir, 'panic', { - event: 'hook_intervention', - channel: 'pre_tool_use', - format, - panic_level: state.panicLevel, - severity: output.severity, - directive_mode: newCount >= 3, - intervention_count: newCount, - gryph_enriched: gryphSignals !== null, - }); + process.stdout.write(formatOutput(output, format) + '\n'); + } catch { + // fail-open: any error → silent exit 0 } - - process.stdout.write(formatOutput(output, format) + '\n'); process.exit(0); }); diff --git a/src/cli/commands/setup.ts b/src/cli/commands/setup.ts index baaf0e64..08794541 100644 --- a/src/cli/commands/setup.ts +++ b/src/cli/commands/setup.ts @@ -23,6 +23,8 @@ import { fileURLToPath } from 'node:url'; import { checkbox } from '@inquirer/prompts'; import { logger } from '../../utils/logger.js'; import { installPreCommitHook, installClaudeHook } from './decisions.js'; +import { readOpenLoreConfig, writeOpenLoreConfig } from '../../core/services/config-manager.js'; +import type { PanicResponseMode } from '../../types/index.js'; // ============================================================================ // PANIC CHECK HOOK @@ -328,7 +330,11 @@ export const setupCommand = new Command('setup') '--hooks ', 'Install PreToolUse panic-check hook for the given agent format: claude|kilo|codex' ) - .action(async (options: { tools?: string; force: boolean; dir: string; hooks?: string }) => { + .option( + '--panic ', + 'Set panic response mode in .openlore/config.json: off|telemetry|advisory|experimental_blocking' + ) + .action(async (options: { tools?: string; force: boolean; dir: string; hooks?: string; panic?: string }) => { const projectRoot = options.dir; const allTools: ToolName[] = ['vibe', 'cline', 'gsd', 'bmad', 'claude', 'opencode', 'omoa']; @@ -423,6 +429,24 @@ export const setupCommand = new Command('setup') await installPanicCheckHook(projectRoot, fmt); } + // --panic flag: update panicResponse.mode in .openlore/config.json + if (options.panic !== undefined) { + const validModes: PanicResponseMode[] = ['off', 'telemetry', 'advisory', 'experimental_blocking']; + if (!validModes.includes(options.panic as PanicResponseMode)) { + logger.error(`Unknown panic mode "${options.panic}". Valid: ${validModes.join(', ')}`); + } else { + const mode = options.panic as PanicResponseMode; + const cfg = await readOpenLoreConfig(projectRoot); + if (!cfg) { + logger.warning('No .openlore/config.json found — run openlore init first.'); + } else { + cfg.panicResponse = { mode }; + await writeOpenLoreConfig(projectRoot, cfg); + logger.success(`panic response mode set to "${mode}"`); + } + } + } + // ── Report ─────────────────────────────────────────────────────────────── const byTool: Record = {}; for (const r of results) { diff --git a/src/core/services/config-manager.ts b/src/core/services/config-manager.ts index a93c1973..b3d6998f 100644 --- a/src/core/services/config-manager.ts +++ b/src/core/services/config-manager.ts @@ -65,6 +65,7 @@ export function getDefaultConfig(projectType: ProjectType, openspecPath: string) model: DEFAULT_ANTHROPIC_MODEL, domains: 'auto', }, + panicResponse: { mode: 'off' }, createdAt: new Date().toISOString(), lastRun: null, }; diff --git a/src/core/services/mcp-handlers/epistemic-lease.test.ts b/src/core/services/mcp-handlers/epistemic-lease.test.ts index e214196a..e327f286 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.test.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.test.ts @@ -3,7 +3,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { createTracker, updateTracker, injectFreshness, getSourceRoots, trackerToPanicState } from './epistemic-lease.js'; +import { createTracker, updateTracker, updatePanic, injectFreshness, getSourceRoots, trackerToPanicState } from './epistemic-lease.js'; import type { EpistemicTracker } from './epistemic-lease.js'; // ============================================================================ @@ -622,6 +622,19 @@ describe('updateTracker — V3.1 cross-module trajectory', () => { }); }); +// ============================================================================ +// Panic helpers — policy is now external; tests must call updatePanic() explicitly +// after updateTracker() when they want to observe panic scoring behavior. +// ============================================================================ + +function callBoth(t: EpistemicTracker, tool: string, dir: string, filePath?: string): void { + updateTracker(t, tool, dir, filePath); + // orient is handled by resetTracker() internally; do not double-apply panic scoring. + if (tool !== 'orient') { + updatePanic(t, { density: t.density, oscillation: t.oscillation, weight: 1, staleDepth: t.staleDepth, directory: dir, tool }); + } +} + // ============================================================================ // Panic — score accumulation and level transitions // ============================================================================ @@ -638,7 +651,7 @@ describe('panic — score and level via updateTracker', () => { // Build A→B→A→B oscillation (bigram repetition) driving oscillation score up for (let i = 0; i < 15; i++) { const mod = i % 2 === 0 ? 'auth' : 'billing'; - updateTracker(t, 'search_code', '/fake/repo', `src/${mod}/x.ts`); + callBoth(t, 'search_code', '/fake/repo', `src/${mod}/x.ts`); } expect(t.panicScore).toBeGreaterThan(0); }); @@ -650,7 +663,7 @@ describe('panic — score and level via updateTracker', () => { t.moduleAccessWindow = ['auth','billing','auth','billing','auth','billing','auth','billing', 'auth','billing','auth','billing','auth','billing','auth'] as (string|null)[]; t.lastModule = 'auth'; - updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/billing/x.ts'); + callBoth(t, 'trace_execution_path', '/fake/repo', 'src/billing/x.ts'); expect(t.panicLevel).toBeGreaterThanOrEqual(1); }); @@ -660,7 +673,7 @@ describe('panic — score and level via updateTracker', () => { // Force stale at depth 3 t.freshnessState = 'stale'; t.staleDepth = 3; - updateTracker(t, 'list_spec_domains', '/fake/repo'); + callBoth(t, 'list_spec_domains', '/fake/repo'); // Panic ceiling: staleDepth≥3 → panicLevel ≥ 2 expect(t.panicLevel).toBeGreaterThanOrEqual(2); }); @@ -670,7 +683,7 @@ describe('panic — score and level via updateTracker', () => { t.panicLevel = 1; t.panicScore = 5; // below down-threshold for L1 (20) → drops to L0 t.interventionCountSinceStable = 5; - updateTracker(t, 'list_spec_domains', '/fake/repo'); + callBoth(t, 'list_spec_domains', '/fake/repo'); expect(t.panicLevel).toBe(0); expect(t.interventionCountSinceStable).toBe(0); }); @@ -757,7 +770,7 @@ describe('panic — individual signal detection', () => { ] as (string|null)[]; t.lastModule = 'e'; // density = 14 switches / 15 = 0.93 → trajectory_burst fires - updateTracker(t, 'search_code', '/fake/repo', 'src/f/x.ts'); + callBoth(t, 'search_code', '/fake/repo', 'src/f/x.ts'); // +15 trajectory_burst (oscillation may also add +10 if ≥0.50) expect(t.panicScore).toBeGreaterThanOrEqual(15); }); @@ -771,7 +784,7 @@ describe('panic — individual signal detection', () => { t.moduleAccessWindow = window; t.lastModule = 'billing'; // This call adds 'auth', creating another A→B→A bigram → oscillation stays high - updateTracker(t, 'search_code', '/fake/repo', 'src/auth/x.ts'); + callBoth(t, 'search_code', '/fake/repo', 'src/auth/x.ts'); // oscillation_spike (+10) + trajectory_burst (+15) both fire expect(t.panicScore).toBeGreaterThanOrEqual(10); }); @@ -784,7 +797,7 @@ describe('panic — individual signal detection', () => { // Build low localityConfidence via high density + oscillation in window t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; t.localityConfidence = 0.1; // already low from previous calls — gate should open - updateTracker(t, 'search_code', '/fake/repo', 'src/c/x.ts'); + callBoth(t, 'search_code', '/fake/repo', 'src/c/x.ts'); // trajectory_burst + oscillation_spike + stale_depth_3 all fire expect(t.panicScore).toBeGreaterThanOrEqual(25); }); @@ -797,7 +810,7 @@ describe('panic — individual signal detection', () => { // Empty window → density=0, oscillation=0 → localityConfidence=1.0 t.moduleAccessWindow = []; t.localityConfidence = 1.0; - updateTracker(t, 'search_code', '/fake/repo'); // no filePath → stays in same module + callBoth(t, 'search_code', '/fake/repo'); // no filePath → stays in same module // stale_depth_3 gate blocked; only decay/locality_recovery may apply // score should not increase (no upward signals fire at high localityConfidence) expect(t.panicScore).toBe(0); @@ -809,7 +822,7 @@ describe('panic — individual signal detection', () => { t.moduleAccessWindow = []; // empty → density=0, oscillation=0 t.localityConfidence = 1.0; t.staleDepth = 0; - updateTracker(t, 'search_code', '/fake/repo'); // no cross-module activity + callBoth(t, 'search_code', '/fake/repo'); // no cross-module activity // locality_recovery (-3) fires; panicScore should drop expect(t.panicScore).toBeLessThan(20); }); @@ -852,7 +865,7 @@ describe('panic — refractory period after orient()', () => { t.lastModule = 'a'; t.freshnessState = 'stale'; t.staleDepth = 3; - updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); + callBoth(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); // Upward signals blocked by refractory — score should not increase above post-orient value // (may decrease from decay/locality_recovery, but not increase) @@ -874,7 +887,7 @@ describe('panic — refractory period after orient()', () => { t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; t.lastModule = 'a'; t.localityConfidence = 0.0; - updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); + callBoth(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); // Signals should now fire → score increases expect(t.panicScore).toBeGreaterThan(scoreAfterOrient); @@ -940,9 +953,11 @@ describe('panic — burst escalation gate', () => { const t = freshTracker(); t.freshnessState = 'stale'; t.staleDepth = 1; - t.localityConfidence = 0.1; // low confidence — drift - // trace_execution_path (weight=8) → burst condition met - updateTracker(t, 'trace_execution_path', '/fake/repo'); + // A→B→A→B oscillation → density + oscillation both high → localityConfidence computed < 0.5 + t.moduleAccessWindow = ['a','b','a','b','a','b','a','b','a','b','a','b','a','b','a'] as (string|null)[]; + t.lastModule = 'a'; + // trace_execution_path (weight=8) → burst condition met; localityConfidence computed from window + updateTracker(t, 'trace_execution_path', '/fake/repo', 'src/b/x.ts'); expect(t.staleDepth).toBe(3); }); }); diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 69ff7921..cc5e5ab3 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -65,6 +65,8 @@ export interface EpistemicTracker { lastSwitchAt: number; /** V3.2: oscillation score — repeated bigram transitions / total transitions [0,1]. */ oscillation: number; + /** V3.2: last computed cross-module density [0,1] — stored so callers can read after updateTracker(). */ + density: number; // Panic fields — behavioral destabilization tracking (separate from freshness) panicScore: number; panicLevel: PanicLevel; @@ -192,7 +194,7 @@ interface PanicProvenanceItem { evidence: Record; } -function updatePanic( +export function updatePanic( tracker: EpistemicTracker, opts: { density: number; oscillation: number; weight: number; staleDepth: number; directory?: string; tool?: string }, ): void { @@ -212,9 +214,9 @@ function updatePanic( provenance.push({ name: 'passive_decay', delta: decayDelta, evidence: { elapsed_min: Math.round(elapsedMin * 100) / 100 } }); } - // localityConfidence captures sustained coherent local work — both density and oscillation - // independently degrade it. Computed here (before signal gating) so it can modulate signals. - const localityConfidence = Math.max(0, (1 - Math.min(1, density * 2)) * (1 - Math.min(1, oscillation))); + // localityConfidence is computed in updateTracker() and stored in tracker. + // Read it here so signal gating uses the current value. + const localityConfidence = tracker.localityConfidence; // Upward signals — suppressed during refractory period after orient() recovery if (!inRefractory) { @@ -247,7 +249,6 @@ function updatePanic( const scoreBefore = tracker.panicScore; tracker.lastPanicUpdateAt = now; tracker.panicScore = Math.min(PANIC_SCORE_MAX, Math.max(0, tracker.panicScore + delta)); - tracker.localityConfidence = localityConfidence; // Accumulate trigger names for the current episode (upward signals only) const upwardTriggers = provenance.filter(p => p.delta > 0).map(p => p.name); @@ -433,6 +434,7 @@ export function createTracker(directory: string): EpistemicTracker { lastDensityPenaltyAt: 0, lastSwitchAt: 0, oscillation: 0, + density: 0, panicScore: 0, panicLevel: 0, localityConfidence: 1, @@ -549,6 +551,10 @@ export function updateTracker( const density = computeCrossModuleDensity(tracker.moduleAccessWindow); const oscillation = computeOscillationScore(tracker.moduleAccessWindow); tracker.oscillation = oscillation; + tracker.density = density; + // localityConfidence is a navigation coherence metric — computed here so it's + // always current regardless of whether panic scoring is enabled. + tracker.localityConfidence = Math.max(0, (1 - Math.min(1, density * 2)) * (1 - Math.min(1, oscillation))); // Already stale — time-based depth escalation only, plus V3.2 burst sensitivity. // Load stops accumulating here; burst detection uses tool weight and density instead. @@ -565,7 +571,6 @@ export function updateTracker( locality_confidence: tracker.localityConfidence, }); tracker.staleDepth = 3; - updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory, tool: toolName }); return; } const newDepth = computeStaleDepth(tracker.cognitiveLoad, ageMs); @@ -577,7 +582,6 @@ export function updateTracker( }); tracker.staleDepth = newDepth as StaleDepth; } - updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory, tool: toolName }); return; } @@ -632,8 +636,6 @@ export function updateTracker( tracker.freshnessState = 'degraded'; emit(directory, 'epistemic-lease', { event: 'degraded', trigger, ...telCtx }); } - - updatePanic(tracker, { density, oscillation, weight, staleDepth: tracker.staleDepth, directory, tool: toolName }); } // ============================================================================ diff --git a/src/types/index.ts b/src/types/index.ts index 159d7a00..28e5f1d7 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -5,6 +5,9 @@ // Project detection types export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'java' | 'ruby' | 'php' | 'unknown'; +// Panic response impact level +export type PanicResponseMode = 'off' | 'telemetry' | 'advisory' | 'experimental_blocking'; + // Configuration types export interface OpenLoreConfig { version: string; @@ -14,6 +17,14 @@ export interface OpenLoreConfig { generation: GenerationConfig; llm?: LLMConfig; embedding?: EmbeddingConfig; + panicResponse?: { + /** + * Controls panic scoring and intervention only — freshness tracking (density, + * oscillation, staleDepth, localityConfidence) is always computed regardless. + * Default: 'off'. + */ + mode: PanicResponseMode; + }; createdAt: string; lastRun: string | null; } From 90c422ec95a84c3cd3934d833fae965408a8102d Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Thu, 21 May 2026 21:46:31 +0200 Subject: [PATCH 10/22] fix(telemetry): remove unused toolCall helper in computeRecovery describe block --- src/cli/commands/telemetry.test.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cli/commands/telemetry.test.ts b/src/cli/commands/telemetry.test.ts index b912fe93..74aa7535 100644 --- a/src/cli/commands/telemetry.test.ts +++ b/src/cli/commands/telemetry.test.ts @@ -192,9 +192,6 @@ describe('computeRecovery', () => { function orientCall(offsetMs: number): McpEvent { return { ts: ts(offsetMs), event: 'tool_call', tool: 'orient', ms: 50 }; } - function toolCall(name: string, offsetMs: number): McpEvent { - return { ts: ts(offsetMs), event: 'tool_call', tool: name, ms: 20 }; - } function degraded(offsetMs: number): LeaseEvent { return { ts: ts(offsetMs), event: 'degraded' }; } From 6eba94ae5a9036b2d8325b74589d012d54bed941 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Thu, 21 May 2026 21:50:33 +0200 Subject: [PATCH 11/22] =?UTF-8?q?feat(panic):=20add=20privacy=20mode=20?= =?UTF-8?q?=E2=80=94=20zero=20instrumentation=20layer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separates instrumentation from policy. 'off' already disabled persistence and intervention; 'privacy' cuts deeper — skips updateTracker() entirely so no behavioral profiling occurs in memory. Mode ladder (bottom to top): privacy → no instrumentation, no persistence, no intervention off (default) → in-memory tracking only telemetry → tracking + persistence advisory → tracking + persistence + response injection experimental_blocking → + block signal to hook runtime --- src/cli/commands/mcp.ts | 4 ++-- src/cli/commands/panic-check.ts | 2 +- src/types/index.ts | 14 ++++++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index 5485ee85..375d22dc 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -1360,7 +1360,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { trackerDir = directory; const cfg = await readOpenLoreConfig(directory); panicPolicy = cfg?.panicResponse?.mode ?? 'off'; - if (panicPolicy !== 'off') { + if (panicPolicy !== 'off' && panicPolicy !== 'privacy') { emit(directory, 'panic-response', { event: 'panic_mode_active', mode: panicPolicy }); } } @@ -1368,7 +1368,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // Invariant: only MCP tool calls (this path) feed panic. CLI commands (panic-check, // telemetry) are separate processes that read state but never call updateTracker — // no recursive panic feedback loop from openlore internal commands. - if (tracker && directory) { + if (tracker && directory && panicPolicy !== 'privacy') { updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); if (panicPolicy !== 'off') { updatePanic(tracker, { diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index eb17f89e..31523c40 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -30,7 +30,7 @@ export const panicCheckCommand = new Command('panic-check') const cfg = await readOpenLoreConfig(dir); const mode = cfg?.panicResponse?.mode ?? 'off'; - if (mode === 'off' || mode === 'telemetry') { + if (mode === 'privacy' || mode === 'off' || mode === 'telemetry') { // Panic disabled or telemetry-only: hook passes through silently process.exit(0); } diff --git a/src/types/index.ts b/src/types/index.ts index 28e5f1d7..981350c6 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -6,7 +6,11 @@ export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'java' | 'ruby' | 'php' | 'unknown'; // Panic response impact level -export type PanicResponseMode = 'off' | 'telemetry' | 'advisory' | 'experimental_blocking'; +// privacy: no instrumentation — updateTracker() skipped entirely, zero behavioral profiling +// off: instrumentation runs in-memory, no persistence, no intervention +// telemetry: instrumentation + persistence, no intervention +// advisory / experimental_blocking: full pipeline with response injection / block signal +export type PanicResponseMode = 'privacy' | 'off' | 'telemetry' | 'advisory' | 'experimental_blocking'; // Configuration types export interface OpenLoreConfig { @@ -19,9 +23,11 @@ export interface OpenLoreConfig { embedding?: EmbeddingConfig; panicResponse?: { /** - * Controls panic scoring and intervention only — freshness tracking (density, - * oscillation, staleDepth, localityConfidence) is always computed regardless. - * Default: 'off'. + * Controls instrumentation, scoring, and intervention. + * 'privacy': skips updateTracker() entirely — zero behavioral profiling. + * 'off': in-memory tracking only, no persistence or intervention (default). + * 'telemetry': tracking + persistence, no intervention. + * 'advisory' / 'experimental_blocking': full pipeline. */ mode: PanicResponseMode; }; From 2a127faa1dc3666a23edde9cef340ddf80964107 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Thu, 21 May 2026 21:56:16 +0200 Subject: [PATCH 12/22] =?UTF-8?q?refactor(panic):=20collapse=20privacy?= =?UTF-8?q?=E2=86=92off,=20simplify=20mode=20ladder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'off' now means zero instrumentation (updateTracker skipped). Removed the intermediate 'off' state that ran tracking without consumers — it was computationally dead. Inner updatePanic gate drops; anything past 'off' runs the full pipeline. Final ladder: off | telemetry | advisory | experimental_blocking --- src/cli/commands/mcp.ts | 24 +++++++++++------------- src/cli/commands/panic-check.ts | 2 +- src/types/index.ts | 8 +++----- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index 375d22dc..1c2285f5 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -1360,7 +1360,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { trackerDir = directory; const cfg = await readOpenLoreConfig(directory); panicPolicy = cfg?.panicResponse?.mode ?? 'off'; - if (panicPolicy !== 'off' && panicPolicy !== 'privacy') { + if (panicPolicy !== 'off') { emit(directory, 'panic-response', { event: 'panic_mode_active', mode: panicPolicy }); } } @@ -1368,19 +1368,17 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // Invariant: only MCP tool calls (this path) feed panic. CLI commands (panic-check, // telemetry) are separate processes that read state but never call updateTracker — // no recursive panic feedback loop from openlore internal commands. - if (tracker && directory && panicPolicy !== 'privacy') { + if (tracker && directory && panicPolicy !== 'off') { updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); - if (panicPolicy !== 'off') { - updatePanic(tracker, { - density: tracker.density, - oscillation: tracker.oscillation, - weight: 1, // weight read from TOOL_WEIGHTS inside updatePanic via opts — set baseline here - staleDepth: tracker.staleDepth, - directory, - tool: name, - }); - writePanicState(directory, trackerToPanicState(tracker, agentName)); - } + updatePanic(tracker, { + density: tracker.density, + oscillation: tracker.oscillation, + weight: 1, // weight read from TOOL_WEIGHTS inside updatePanic via opts — set baseline here + staleDepth: tracker.staleDepth, + directory, + tool: name, + }); + writePanicState(directory, trackerToPanicState(tracker, agentName)); } let result: unknown; diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index 31523c40..eb17f89e 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -30,7 +30,7 @@ export const panicCheckCommand = new Command('panic-check') const cfg = await readOpenLoreConfig(dir); const mode = cfg?.panicResponse?.mode ?? 'off'; - if (mode === 'privacy' || mode === 'off' || mode === 'telemetry') { + if (mode === 'off' || mode === 'telemetry') { // Panic disabled or telemetry-only: hook passes through silently process.exit(0); } diff --git a/src/types/index.ts b/src/types/index.ts index 981350c6..012af950 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -6,11 +6,10 @@ export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'java' | 'ruby' | 'php' | 'unknown'; // Panic response impact level -// privacy: no instrumentation — updateTracker() skipped entirely, zero behavioral profiling -// off: instrumentation runs in-memory, no persistence, no intervention +// off: no instrumentation — updateTracker() skipped, zero behavioral profiling (default) // telemetry: instrumentation + persistence, no intervention // advisory / experimental_blocking: full pipeline with response injection / block signal -export type PanicResponseMode = 'privacy' | 'off' | 'telemetry' | 'advisory' | 'experimental_blocking'; +export type PanicResponseMode = 'off' | 'telemetry' | 'advisory' | 'experimental_blocking'; // Configuration types export interface OpenLoreConfig { @@ -24,8 +23,7 @@ export interface OpenLoreConfig { panicResponse?: { /** * Controls instrumentation, scoring, and intervention. - * 'privacy': skips updateTracker() entirely — zero behavioral profiling. - * 'off': in-memory tracking only, no persistence or intervention (default). + * 'off': skips updateTracker() entirely — zero behavioral profiling (default). * 'telemetry': tracking + persistence, no intervention. * 'advisory' / 'experimental_blocking': full pipeline. */ From fd1f1d5550d1bf71b4563d44d8a42aee749523f0 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Fri, 22 May 2026 21:09:35 +0200 Subject: [PATCH 13/22] =?UTF-8?q?refactor(panic):=20collapse=20privacy?= =?UTF-8?q?=E2=86=92off,=20simplify=20mode=20ladder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restores updateTracker() to always run — freshness/epistemic tracking is not panic instrumentation. 'off' disables the panic subsystem only; the epistemic engine remains active regardless of panic mode. Renames 'telemetry' → 'observe': the mode observes the panic engine without intervening, not just moves telemetry data. Final ladder: off | observe | advisory | experimental_blocking --- src/cli/commands/mcp.ts | 22 ++++++++++++---------- src/cli/commands/panic-check.ts | 4 ++-- src/cli/commands/setup.ts | 4 ++-- src/types/index.ts | 14 +++++++------- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index 1c2285f5..5485ee85 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -1368,17 +1368,19 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // Invariant: only MCP tool calls (this path) feed panic. CLI commands (panic-check, // telemetry) are separate processes that read state but never call updateTracker — // no recursive panic feedback loop from openlore internal commands. - if (tracker && directory && panicPolicy !== 'off') { + if (tracker && directory) { updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); - updatePanic(tracker, { - density: tracker.density, - oscillation: tracker.oscillation, - weight: 1, // weight read from TOOL_WEIGHTS inside updatePanic via opts — set baseline here - staleDepth: tracker.staleDepth, - directory, - tool: name, - }); - writePanicState(directory, trackerToPanicState(tracker, agentName)); + if (panicPolicy !== 'off') { + updatePanic(tracker, { + density: tracker.density, + oscillation: tracker.oscillation, + weight: 1, // weight read from TOOL_WEIGHTS inside updatePanic via opts — set baseline here + staleDepth: tracker.staleDepth, + directory, + tool: name, + }); + writePanicState(directory, trackerToPanicState(tracker, agentName)); + } } let result: unknown; diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index eb17f89e..e5803bdd 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -30,8 +30,8 @@ export const panicCheckCommand = new Command('panic-check') const cfg = await readOpenLoreConfig(dir); const mode = cfg?.panicResponse?.mode ?? 'off'; - if (mode === 'off' || mode === 'telemetry') { - // Panic disabled or telemetry-only: hook passes through silently + if (mode === 'off' || mode === 'observe') { + // Panic disabled or observe-only: hook passes through silently process.exit(0); } diff --git a/src/cli/commands/setup.ts b/src/cli/commands/setup.ts index 08794541..08429ae2 100644 --- a/src/cli/commands/setup.ts +++ b/src/cli/commands/setup.ts @@ -332,7 +332,7 @@ export const setupCommand = new Command('setup') ) .option( '--panic ', - 'Set panic response mode in .openlore/config.json: off|telemetry|advisory|experimental_blocking' + 'Set panic response mode in .openlore/config.json: off|observe|advisory|experimental_blocking' ) .action(async (options: { tools?: string; force: boolean; dir: string; hooks?: string; panic?: string }) => { const projectRoot = options.dir; @@ -431,7 +431,7 @@ export const setupCommand = new Command('setup') // --panic flag: update panicResponse.mode in .openlore/config.json if (options.panic !== undefined) { - const validModes: PanicResponseMode[] = ['off', 'telemetry', 'advisory', 'experimental_blocking']; + const validModes: PanicResponseMode[] = ['off', 'observe', 'advisory', 'experimental_blocking']; if (!validModes.includes(options.panic as PanicResponseMode)) { logger.error(`Unknown panic mode "${options.panic}". Valid: ${validModes.join(', ')}`); } else { diff --git a/src/types/index.ts b/src/types/index.ts index 012af950..f8bf6938 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -6,10 +6,10 @@ export type ProjectType = 'nodejs' | 'python' | 'rust' | 'go' | 'java' | 'ruby' | 'php' | 'unknown'; // Panic response impact level -// off: no instrumentation — updateTracker() skipped, zero behavioral profiling (default) -// telemetry: instrumentation + persistence, no intervention +// off: panic subsystem disabled. Freshness/epistemic tracking always runs regardless. (default) +// observe: panic scoring + state file, no intervention — observe the engine without acting // advisory / experimental_blocking: full pipeline with response injection / block signal -export type PanicResponseMode = 'off' | 'telemetry' | 'advisory' | 'experimental_blocking'; +export type PanicResponseMode = 'off' | 'observe' | 'advisory' | 'experimental_blocking'; // Configuration types export interface OpenLoreConfig { @@ -22,10 +22,10 @@ export interface OpenLoreConfig { embedding?: EmbeddingConfig; panicResponse?: { /** - * Controls instrumentation, scoring, and intervention. - * 'off': skips updateTracker() entirely — zero behavioral profiling (default). - * 'telemetry': tracking + persistence, no intervention. - * 'advisory' / 'experimental_blocking': full pipeline. + * Controls panic scoring and intervention. Freshness/epistemic tracking always runs. + * 'off': panic subsystem disabled entirely (default). + * 'observe': panic scoring + state written, no intervention. + * 'advisory' / 'experimental_blocking': full pipeline with response injection. */ mode: PanicResponseMode; }; From 7eeb612a8d456a10ff8cc6d6e538f99bef8683ee Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Fri, 22 May 2026 21:22:03 +0200 Subject: [PATCH 14/22] docs(panic): clarify localityConfidence role and refractory replace semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit localityConfidence is shared behavioral state (freshness + panic), not purely freshness state. Refractory deadline is replaced on each orient() recovery, not extended — document both explicitly in-source. --- src/core/services/mcp-handlers/epistemic-lease.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index cc5e5ab3..792b6ea5 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -479,6 +479,8 @@ function resetTracker(tracker: EpistemicTracker, directory: string): void { } // Set refractory window when orient() achieves actual score reduction. // Suppresses upward signals for 45s to let recovery land before re-escalating. + // Subsequent orient() calls during an active refractory replace the deadline + // (not extend): the window always starts fresh from the most recent recovery. if (panicDelta < 0) { tracker.panicRecoverySuppressionUntil = now + PANIC_REFRACTORY_MS; } @@ -552,7 +554,8 @@ export function updateTracker( const oscillation = computeOscillationScore(tracker.moduleAccessWindow); tracker.oscillation = oscillation; tracker.density = density; - // localityConfidence is a navigation coherence metric — computed here so it's + // localityConfidence is shared behavioral state: used by freshness (burst gate) + // and panic (stale_depth_3 gate, burst escalation gate). Computed here so it's // always current regardless of whether panic scoring is enabled. tracker.localityConfidence = Math.max(0, (1 - Math.min(1, density * 2)) * (1 - Math.min(1, oscillation))); From c1412714aadc8d2837b5ed928f6788e33c58cb28 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Fri, 22 May 2026 21:26:13 +0200 Subject: [PATCH 15/22] docs(panic): harden contracts on localityConfidence, off semantics, block advisory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - localityConfidence interface gets explicit WARNING: affects both freshness and panic — blast-radius comment locks the shared contract - experimental_blocking payload adds advisory:true — protocol now matches the documented "runtime decides" semantics; no implicit authority - OpenLoreConfig docstring enumerates exactly what 'off' disables vs. what continues (freshness metrics) — eliminates the apparent contradiction --- src/cli/commands/panic-check.ts | 5 +++-- src/core/services/mcp-handlers/epistemic-lease.ts | 5 +++++ src/types/index.ts | 14 ++++++++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index e5803bdd..8e707eb3 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -79,9 +79,10 @@ export const panicCheckCommand = new Command('panic-check') } // experimental_blocking: emit block signal at L4 — runtime decides enforcement. - // OpenLore always exits 0. This is NOT the same as an advisory level. + // advisory:true is explicit in the payload: OpenLore recommends, never mandates. + // OpenLore always exits 0. if (mode === 'experimental_blocking' && state.panicLevel >= 4) { - const blockOutput = { decision: 'block' as const, panicLevel: state.panicLevel, message: output.message }; + const blockOutput = { decision: 'block' as const, advisory: true, panicLevel: state.panicLevel, message: output.message }; process.stdout.write(JSON.stringify(blockOutput) + '\n'); process.exit(0); } diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 792b6ea5..466ac50b 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -70,6 +70,11 @@ export interface EpistemicTracker { // Panic fields — behavioral destabilization tracking (separate from freshness) panicScore: number; panicLevel: PanicLevel; + /** + * Shared behavioral coherence metric [0,1]. + * Used by: freshness burst gating AND panic escalation gating (stale_depth_3, burst). + * WARNING: changes affect both systems. Modify with full blast-radius awareness. + */ localityConfidence: number; recentOrientCount: number; lastOrientResetAt: number; diff --git a/src/types/index.ts b/src/types/index.ts index f8bf6938..177b802a 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -22,10 +22,16 @@ export interface OpenLoreConfig { embedding?: EmbeddingConfig; panicResponse?: { /** - * Controls panic scoring and intervention. Freshness/epistemic tracking always runs. - * 'off': panic subsystem disabled entirely (default). - * 'observe': panic scoring + state written, no intervention. - * 'advisory' / 'experimental_blocking': full pipeline with response injection. + * Controls the panic response subsystem. Default: 'off'. + * + * 'off' disables: panic scoring, panic state persistence, panic interventions, + * panic telemetry, panic hook output. + * Behavioral metrics required by the freshness engine (density, oscillation, + * localityConfidence) continue to be computed in-memory as part of EpistemicLease. + * 'observe': panic scoring + state written, no intervention (collect only). + * 'advisory': full pipeline with L2+ response injection. + * 'experimental_blocking': advisory + runtime-mediated block signal at L4. + * advisory:true is always present in the payload — runtime decides enforcement. */ mode: PanicResponseMode; }; From 27633da32048e4e978bb80529df36cffd22d1fae Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Fri, 22 May 2026 21:37:18 +0200 Subject: [PATCH 16/22] refactor(panic): centralize constants in panic-constants.ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single source of truth for all numeric thresholds, weights, cooldowns, and timing values. Both panic-response.ts and epistemic-lease.ts now import from it. Tests reference constants directly — no more behavioral snapshot drift when a threshold changes. New exports: PANIC_UP/DOWN_THRESHOLD, PANIC_TRAJECTORY_DENSITY/DELTA, PANIC_OSCILLATION_THRESHOLD/DELTA, PANIC_STALE_D3_LOCALITY_GATE/DELTA, PANIC_LOCALITY_RECOVERY, PANIC_DECAY_PER_MIN, PANIC_REFRACTORY_MS, PANIC_SESSION_EXPIRY_MS, HOOK_COOLDOWN_MS, SEVERITY_MAP. --- .../mcp-handlers/epistemic-lease.test.ts | 12 +++ .../services/mcp-handlers/epistemic-lease.ts | 30 +++--- .../services/mcp-handlers/panic-constants.ts | 95 +++++++++++++++++++ .../mcp-handlers/panic-response.test.ts | 49 +++++----- .../services/mcp-handlers/panic-response.ts | 21 ++-- 5 files changed, 160 insertions(+), 47 deletions(-) create mode 100644 src/core/services/mcp-handlers/panic-constants.ts diff --git a/src/core/services/mcp-handlers/epistemic-lease.test.ts b/src/core/services/mcp-handlers/epistemic-lease.test.ts index e327f286..a9b7919f 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.test.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.test.ts @@ -5,6 +5,18 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { createTracker, updateTracker, updatePanic, injectFreshness, getSourceRoots, trackerToPanicState } from './epistemic-lease.js'; import type { EpistemicTracker } from './epistemic-lease.js'; +import { + PANIC_TRAJECTORY_DENSITY, + PANIC_TRAJECTORY_DELTA, + PANIC_OSCILLATION_THRESHOLD, + PANIC_OSCILLATION_DELTA, + PANIC_STALE_D3_LOCALITY_GATE, + PANIC_STALE_D3_DELTA, + PANIC_REFRACTORY_MS, + PANIC_UP_THRESHOLD, + PANIC_DOWN_THRESHOLD, + HOOK_COOLDOWN_MS, +} from './panic-constants.js'; // ============================================================================ // Mock git hash — default returns stable hash diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 466ac50b..55bb94bf 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -36,6 +36,18 @@ import { import { emit } from '../telemetry.js'; import { applyPanicHysteresis } from './panic-response.js'; import type { PanicLevel, PanicState } from './panic-response.js'; +import { + PANIC_SCORE_MAX, + PANIC_TRAJECTORY_DENSITY, + PANIC_TRAJECTORY_DELTA, + PANIC_OSCILLATION_THRESHOLD, + PANIC_OSCILLATION_DELTA, + PANIC_STALE_D3_LOCALITY_GATE, + PANIC_STALE_D3_DELTA, + PANIC_LOCALITY_RECOVERY, + PANIC_DECAY_PER_MIN, + PANIC_REFRACTORY_MS, +} from './panic-constants.js'; // ============================================================================ // TYPES @@ -179,13 +191,7 @@ const BURST_TOOL_WEIGHT_THRESHOLD = 8; // tool weight for post-stale burst // Panic constants const RAPID_ORIENT_INTERVAL_MS = 2 * 60 * 1000; // orients within 2min are "rapid" -const PANIC_SCORE_MAX = 100; -// Spec-correct panic signal thresholds -const PANIC_TRAJECTORY_DENSITY = 0.60; // trajectory burst → +15 -const PANIC_OSCILLATION_THRESHOLD = 0.50; // oscillation spike → +10 -const PANIC_DECAY_PER_MIN = 5; // passive wall-clock decay -const PANIC_LOCALITY_RECOVERY = 3; // per-call recovery when stable -const PANIC_REFRACTORY_MS = 45_000; // post-orient suppression window (45s) +// Panic signal thresholds and weights imported from panic-constants.ts // ============================================================================ // PANIC UPDATE @@ -226,19 +232,19 @@ export function updatePanic( // Upward signals — suppressed during refractory period after orient() recovery if (!inRefractory) { if (density >= PANIC_TRAJECTORY_DENSITY) { - const d = 15; + const d = PANIC_TRAJECTORY_DELTA; delta += d; provenance.push({ name: 'trajectory_burst', delta: d, evidence: { density } }); } if (oscillation >= PANIC_OSCILLATION_THRESHOLD) { - const d = 10; + const d = PANIC_OSCILLATION_DELTA; delta += d; provenance.push({ name: 'oscillation_spike', delta: d, evidence: { oscillation } }); } // stale_depth_3 signal gated by localityConfidence: a stale agent doing focused local // work (high confidence) is much less risky than a stale agent in behavioral drift. - if (staleDepth >= 3 && localityConfidence < 0.5) { - const d = 25; + if (staleDepth >= 3 && localityConfidence < PANIC_STALE_D3_LOCALITY_GATE) { + const d = PANIC_STALE_D3_DELTA; delta += d; provenance.push({ name: 'stale_depth_3', delta: d, evidence: { stale_depth: staleDepth, locality_confidence: localityConfidence } }); } @@ -571,7 +577,7 @@ export function updateTracker( // Gated by localityConfidence: a stale agent doing focused local work is not bursting. // High confidence (≥0.5) suppresses burst escalation — only clear behavioral drift triggers it. const isBurst = weight >= BURST_TOOL_WEIGHT_THRESHOLD || density >= BURST_DENSITY_THRESHOLD; - if (tracker.staleDepth < 3 && isBurst && tracker.localityConfidence < 0.5) { + if (tracker.staleDepth < 3 && isBurst && tracker.localityConfidence < PANIC_STALE_D3_LOCALITY_GATE) { emit(directory, 'epistemic-lease', { event: 'depth_escalate', from_depth: tracker.staleDepth, to_depth: 3, tool: toolName, module: mod, cognitive_load: tracker.cognitiveLoad, diff --git a/src/core/services/mcp-handlers/panic-constants.ts b/src/core/services/mcp-handlers/panic-constants.ts new file mode 100644 index 00000000..7c25d688 --- /dev/null +++ b/src/core/services/mcp-handlers/panic-constants.ts @@ -0,0 +1,95 @@ +/** + * Panic Response Layer — centralized constants. + * + * Single source of truth for all numeric thresholds, weights, cooldowns, and + * timing values used across the panic subsystem (panic-response.ts, + * epistemic-lease.ts). Exported so tests can reference these values directly + * rather than hard-coding snapshots that silently diverge. + */ + +import type { PanicLevel } from './panic-response.js'; +import type { PanicCheckOutput } from './panic-response.js'; + +// ============================================================================ +// HYSTERESIS THRESHOLDS +// ============================================================================ + +/** Score required to transition upward from level N to N+1. */ +export const PANIC_UP_THRESHOLD: Record = { + 0: 30, + 1: 50, + 2: 70, + 3: 90, +}; + +/** Score below which level N drops to N−1. Separate from UP to prevent thrashing. */ +export const PANIC_DOWN_THRESHOLD: Record = { + 1: 20, + 2: 40, + 3: 60, + 4: 80, +}; + +// ============================================================================ +// SIGNAL WEIGHTS +// ============================================================================ + +/** Trajectory burst signal: density ≥ threshold fires this delta. */ +export const PANIC_TRAJECTORY_DENSITY = 0.60; +export const PANIC_TRAJECTORY_DELTA = 15; + +/** Oscillation spike signal: oscillation ≥ threshold fires this delta. */ +export const PANIC_OSCILLATION_THRESHOLD = 0.50; +export const PANIC_OSCILLATION_DELTA = 10; + +/** Stale-depth-3 persistence signal (gated by localityConfidence < threshold). */ +export const PANIC_STALE_D3_LOCALITY_GATE = 0.5; +export const PANIC_STALE_D3_DELTA = 25; + +/** Locality recovery: per-call score reduction when agent is stable. */ +export const PANIC_LOCALITY_RECOVERY = 3; + +/** Passive wall-clock decay: score reduction per elapsed minute. */ +export const PANIC_DECAY_PER_MIN = 5; + +/** Hard ceiling on panic score. */ +export const PANIC_SCORE_MAX = 100; + +// ============================================================================ +// TIMING +// ============================================================================ + +/** Post-orient() refractory window — upward signals suppressed for this long. */ +export const PANIC_REFRACTORY_MS = 45_000; + +/** Session expiry — panic state older than this is discarded on read. */ +export const PANIC_SESSION_EXPIRY_MS = 30 * 60 * 1000; + +// ============================================================================ +// HOOK COOLDOWNS +// ============================================================================ + +/** + * Minimum ms between hook interventions per panic level. + * Prevents context saturation and habituation from repeated injection. + * L4 = 0: every tool call warned at critical level. + */ +export const HOOK_COOLDOWN_MS: Record = { + 0: 0, + 1: 120_000, + 2: 60_000, + 3: 30_000, + 4: 0, +}; + +// ============================================================================ +// SEVERITY MAP +// ============================================================================ + +export const SEVERITY_MAP: Record = { + 0: undefined, + 1: 'elevated', + 2: 'panic', + 3: 'scope', + 4: 'critical', +}; diff --git a/src/core/services/mcp-handlers/panic-response.test.ts b/src/core/services/mcp-handlers/panic-response.test.ts index 5005540c..bbdbe633 100644 --- a/src/core/services/mcp-handlers/panic-response.test.ts +++ b/src/core/services/mcp-handlers/panic-response.test.ts @@ -19,6 +19,12 @@ import { getPanicSignalText, } from './panic-response.js'; import type { PanicState, PanicLevel } from './panic-response.js'; +import { + PANIC_UP_THRESHOLD, + PANIC_DOWN_THRESHOLD, + HOOK_COOLDOWN_MS, + PANIC_SESSION_EXPIRY_MS, +} from './panic-constants.js'; import { OPENLORE_DIR } from '../../../constants.js'; // ============================================================================ @@ -27,41 +33,40 @@ import { OPENLORE_DIR } from '../../../constants.js'; describe('applyPanicHysteresis', () => { it('stays 0 below up-threshold', () => { - expect(applyPanicHysteresis(0, 29, 0)).toBe(0); + expect(applyPanicHysteresis(0, PANIC_UP_THRESHOLD[0] - 1, 0)).toBe(0); }); - it('transitions 0→1 at score 30', () => { - expect(applyPanicHysteresis(0, 30, 0)).toBe(1); + it('transitions 0→1 at up-threshold', () => { + expect(applyPanicHysteresis(0, PANIC_UP_THRESHOLD[0], 0)).toBe(1); }); - it('transitions 1→2 at score 50', () => { - expect(applyPanicHysteresis(1, 50, 0)).toBe(2); + it('transitions 1→2 at up-threshold', () => { + expect(applyPanicHysteresis(1, PANIC_UP_THRESHOLD[1], 0)).toBe(2); }); - it('transitions 2→3 at score 70', () => { - expect(applyPanicHysteresis(2, 70, 0)).toBe(3); + it('transitions 2→3 at up-threshold', () => { + expect(applyPanicHysteresis(2, PANIC_UP_THRESHOLD[2], 0)).toBe(3); }); it('L3→L4 requires staleDepth ≥ 3', () => { - expect(applyPanicHysteresis(3, 90, 2)).toBe(3); // score meets threshold but stale too low - expect(applyPanicHysteresis(3, 90, 3)).toBe(4); + expect(applyPanicHysteresis(3, PANIC_UP_THRESHOLD[3], 2)).toBe(3); + expect(applyPanicHysteresis(3, PANIC_UP_THRESHOLD[3], 3)).toBe(4); }); it('does not downgrade when score above down-threshold', () => { - expect(applyPanicHysteresis(2, 41, 0)).toBe(2); // down-threshold for L2 is 40 + expect(applyPanicHysteresis(2, PANIC_DOWN_THRESHOLD[2] + 1, 0)).toBe(2); }); - it('downgrade 2→1 when score below 40', () => { - expect(applyPanicHysteresis(2, 39, 0)).toBe(1); + it('downgrade 2→1 when score below down-threshold', () => { + expect(applyPanicHysteresis(2, PANIC_DOWN_THRESHOLD[2] - 1, 0)).toBe(1); }); - it('downgrade 3→2 when score below 60', () => { - expect(applyPanicHysteresis(3, 59, 0)).toBe(2); + it('downgrade 3→2 when score below down-threshold', () => { + expect(applyPanicHysteresis(3, PANIC_DOWN_THRESHOLD[3] - 1, 0)).toBe(2); }); it('no simultaneous up and down transition', () => { - // score 30 → up to 1; no further down in same call - expect(applyPanicHysteresis(0, 30, 0)).toBe(1); + expect(applyPanicHysteresis(0, PANIC_UP_THRESHOLD[0], 0)).toBe(1); }); it('panic ceiling: staleDepth ≥ 3 floors minimum at L2', () => { @@ -114,8 +119,8 @@ describe('readPanicState', () => { expect(state.panicLevel).toBe(0); }); - it('returns defaultPanicState when session expired (>30min)', async () => { - const old = new Date(Date.now() - 31 * 60 * 1000).toISOString(); + it('returns defaultPanicState when session expired', async () => { + const old = new Date(Date.now() - PANIC_SESSION_EXPIRY_MS - 60_000).toISOString(); const expired: PanicState = { ...defaultPanicState(), panicScore: 80, panicLevel: 3, updatedAt: old, lastOrientAt: old }; await writeFile(join(dir, OPENLORE_DIR, 'panic-state.json'), JSON.stringify(expired), 'utf-8'); const state = readPanicState(dir); @@ -156,8 +161,8 @@ describe('buildPanicCheckOutput', () => { expect(out.message).toContain('[PANIC:ELEVATED]'); }); - it('returns allow when within L1 cooldown (120s)', () => { - const recentIntervention = new Date(Date.now() - 60_000).toISOString(); // 60s ago < 120s cooldown + it('returns allow when within L1 cooldown', () => { + const recentIntervention = new Date(Date.now() - HOOK_COOLDOWN_MS[1] / 2).toISOString(); const state: PanicState = { ...defaultPanicState(), panicLevel: 1, @@ -167,8 +172,8 @@ describe('buildPanicCheckOutput', () => { expect(out.decision).toBe('allow'); }); - it('returns warn when L1 cooldown expired (>120s)', () => { - const oldIntervention = new Date(Date.now() - 130_000).toISOString(); + it('returns warn when L1 cooldown expired', () => { + const oldIntervention = new Date(Date.now() - HOOK_COOLDOWN_MS[1] - 10_000).toISOString(); const state: PanicState = { ...defaultPanicState(), panicLevel: 1, diff --git a/src/core/services/mcp-handlers/panic-response.ts b/src/core/services/mcp-handlers/panic-response.ts index 91ccd893..e65fd812 100644 --- a/src/core/services/mcp-handlers/panic-response.ts +++ b/src/core/services/mcp-handlers/panic-response.ts @@ -12,6 +12,13 @@ import { writeFileSync, renameSync, readFileSync, existsSync } from 'node:fs'; import { join } from 'node:path'; import { OPENLORE_DIR } from '../../../constants.js'; +import { + PANIC_UP_THRESHOLD, + PANIC_DOWN_THRESHOLD, + HOOK_COOLDOWN_MS, + SEVERITY_MAP, + PANIC_SESSION_EXPIRY_MS, +} from './panic-constants.js'; // ============================================================================ // TYPES @@ -47,14 +54,6 @@ export interface PanicCheckOutput { // ============================================================================ const PANIC_STATE_FILE = 'panic-state.json'; -const SESSION_EXPIRY_MS = 30 * 60 * 1000; - -// Hysteresis: separate up/down thresholds prevent score thrashing at boundaries -const PANIC_UP_THRESHOLD: Record = { 0: 30, 1: 50, 2: 70, 3: 90 }; -const PANIC_DOWN_THRESHOLD: Record = { 1: 20, 2: 40, 3: 60, 4: 80 }; - -// Cooldowns: sparse injection prevents context saturation and habituation -const HOOK_COOLDOWN_MS: Record = { 0: 0, 1: 120_000, 2: 60_000, 3: 30_000, 4: 0 }; // ============================================================================ // HYSTERESIS @@ -119,7 +118,7 @@ export function readPanicState(directory: string): PanicState { // Session hard reset: zombie state from a previous session must not leak if (parsed.updatedAt) { const age = Date.now() - new Date(parsed.updatedAt).getTime(); - if (age > SESSION_EXPIRY_MS) return defaultPanicState(); + if (age > PANIC_SESSION_EXPIRY_MS) return defaultPanicState(); } return { ...defaultPanicState(), ...parsed, schemaVersion: 1 }; @@ -163,10 +162,6 @@ const DIRECTIVE_MESSAGES: Record = { 4: '[PANIC:CRITICAL] Critical epistemic instability. Call orient() before further modifications.', }; -const SEVERITY_MAP: Record = { - 0: undefined, 1: 'elevated', 2: 'panic', 3: 'scope', 4: 'critical', -}; - /** * Builds the structured output for the panic-check CLI hook consumer. * Always exits 0 — severity encoded in payload, not exit code. From 3207495770dd2ca48591a5d14241ba87a35037d0 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Fri, 22 May 2026 22:13:26 +0200 Subject: [PATCH 17/22] =?UTF-8?q?feat(panic):=20Gryph=20runtime=20observab?= =?UTF-8?q?ility=20=E2=80=94=20background=20polling=20closes=20MCP=20blind?= =?UTF-8?q?=20spot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Promotes Gryph from optional enrichment to first-class behavioral source. Background poll loop (default 15s) updates panic state independently of MCP tool calls: agents working purely via Bash/Edit/Read are now observable. Architecture: RuntimeBehaviorProvider (interface) + GryphBehaviorProvider (impl) startGryphPolling: async, single-flight, syncs in-memory tracker to prevent MCP path from overwriting Gryph-elevated scores on next call. New constants: GRYPH_POLL_INTERVAL_MS, GRYPH_POLL_INTERVAL_MIN_MS, GRYPH_RETRY_BURST_DELTA, GRYPH_LARGE_PATCH_*_DELTA, GRYPH_*_THRESHOLD. New env: OPENLORE_GRYPH_POLL_INTERVAL_MS (min 5000ms, default 15000ms). Provenance carries source:'gryph' on all Gryph-originated deltas. Backward-compat: queryGryphSignals/applyGryphDelta preserved for hook path. Fail-open invariant: all Gryph failures resolve to null, zero impact. 19 new tests: single-flight, timeout, null stability, score accumulation, stale gating, provenance attribution, tracker sync, exception safety. --- src/cli/commands/mcp.ts | 9 + .../mcp-handlers/gryph-bridge.test.ts | 370 ++++++++++++++++++ .../services/mcp-handlers/gryph-bridge.ts | 343 +++++++++++++--- .../services/mcp-handlers/panic-constants.ts | 32 ++ 4 files changed, 695 insertions(+), 59 deletions(-) create mode 100644 src/core/services/mcp-handlers/gryph-bridge.test.ts diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index 5485ee85..8c39c156 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -35,6 +35,7 @@ import { createTracker, updateTracker, updatePanic, getFreshnessSignal, trackerT import type { EpistemicTracker } from '../../core/services/mcp-handlers/epistemic-lease.js'; import type { PanicResponseMode } from '../../types/index.js'; import { writePanicState, getPanicSignalText } from '../../core/services/mcp-handlers/panic-response.js'; +import { startGryphPolling } from '../../core/services/mcp-handlers/gryph-bridge.js'; import { emit } from '../../core/services/telemetry.js'; import { readOpenLoreConfig } from '../../core/services/config-manager.js'; import { DEFAULT_DRIFT_MAX_FILES } from '../../constants.js'; @@ -1310,6 +1311,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { let tracker: EpistemicTracker | undefined; let trackerDir = ''; let panicPolicy: PanicResponseMode = 'off'; + let stopGryphPolling: (() => void) | null = null; // --watch-auto: start the watcher on the first tool call that carries a directory let autoWatcher: import('../../core/services/mcp-watcher.js').McpWatcher | undefined; @@ -1356,12 +1358,19 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // Init (or re-init when project directory changes between calls) if (directory && (!tracker || directory !== trackerDir)) { + stopGryphPolling?.(); + stopGryphPolling = null; tracker = createTracker(directory); trackerDir = directory; const cfg = await readOpenLoreConfig(directory); panicPolicy = cfg?.panicResponse?.mode ?? 'off'; if (panicPolicy !== 'off') { emit(directory, 'panic-response', { event: 'panic_mode_active', mode: panicPolicy }); + const _tracker = tracker; + stopGryphPolling = startGryphPolling({ + directory, + getTracker: () => _tracker, + }); } } // Update epistemic state before dispatch (orient resets tracker internally). diff --git a/src/core/services/mcp-handlers/gryph-bridge.test.ts b/src/core/services/mcp-handlers/gryph-bridge.test.ts new file mode 100644 index 00000000..5ce68d56 --- /dev/null +++ b/src/core/services/mcp-handlers/gryph-bridge.test.ts @@ -0,0 +1,370 @@ +/** + * Tests for gryph-bridge.ts — RuntimeBehaviorProvider, GryphBehaviorProvider, + * startGryphPolling lifecycle (single-flight, async isolation, panic state updates, + * tracker sync, provenance attribution, telemetry). + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { mkdtemp, mkdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { OPENLORE_DIR } from '../../../constants.js'; +import { + GryphBehaviorProvider, + startGryphPolling, + applyGryphDelta, + queryGryphSignals, +} from './gryph-bridge.js'; +import type { RuntimeBehaviorProvider, RuntimeBehaviorSnapshot } from './gryph-bridge.js'; +import { readPanicState, writePanicState, defaultPanicState } from './panic-response.js'; +import type { EpistemicTracker } from './epistemic-lease.js'; +import { + GRYPH_RETRY_BURST_DELTA, + GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA, + GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA, + GRYPH_POLL_INTERVAL_MS, +} from './panic-constants.js'; + +// ============================================================================ +// Helpers +// ============================================================================ + +function makeTracker(overrides: Partial = {}): EpistemicTracker { + return { + lastOrientAt: new Date(), + graphVersionAtOrient: 'abc', + cogLoad: 0, + freshnessState: 'fresh', + staleDepth: 0, + recentModules: [], + density: 0, + oscillation: 0, + localityConfidence: 1, + panicScore: 0, + panicLevel: 0, + recentOrientCount: 0, + lastOrientResetAt: 0, + interventionCountSinceStable: 0, + lastPanicUpdateAt: 0, + panicTriggers: [], + panicRecoverySuppressionUntil: 0, + ...overrides, + } as EpistemicTracker; +} + +class FixedProvider implements RuntimeBehaviorProvider { + constructor(private snapshot: RuntimeBehaviorSnapshot | null) {} + async collect(_since: string): Promise { + return this.snapshot; + } +} + +class CountingProvider implements RuntimeBehaviorProvider { + calls = 0; + snapshots: Array = []; + constructor(private responses: Array = []) {} + async collect(_since: string): Promise { + this.calls++; + const snap = this.responses.shift() ?? null; + this.snapshots.push(snap); + return snap; + } +} + +class SlowProvider implements RuntimeBehaviorProvider { + running = 0; + maxConcurrent = 0; + async collect(_since: string): Promise { + this.running++; + this.maxConcurrent = Math.max(this.maxConcurrent, this.running); + await new Promise(r => setTimeout(r, 50)); + this.running--; + return null; + } +} + +// ============================================================================ +// applyGryphDelta — backward compat path +// ============================================================================ + +describe('applyGryphDelta', () => { + it('retry burst adds delta', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.1, repetitiveRetryBurst: true, largePatchWhileStale: false, largePatchLoc: 0 }, false, triggers); + expect(score).toBe(GRYPH_RETRY_BURST_DELTA); + expect(triggers).toContain('repetitive_retry_burst'); + }); + + it('large patch while stale — low entropy applies heavy delta', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.1, repetitiveRetryBurst: false, largePatchWhileStale: true, largePatchLoc: 600 }, true, triggers); + expect(score).toBe(GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA); + expect(triggers).toContain('large_patch_stale'); + }); + + it('large patch while stale — high entropy attenuated', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.8, repetitiveRetryBurst: false, largePatchWhileStale: true, largePatchLoc: 600 }, true, triggers); + expect(score).toBe(GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA); + expect(triggers).toContain('large_patch_attenuated'); + }); + + it('large patch NOT stale — no delta', () => { + const triggers: string[] = []; + const score = applyGryphDelta(0, { commandEntropy: 0.1, repetitiveRetryBurst: false, largePatchWhileStale: true, largePatchLoc: 600 }, false, triggers); + expect(score).toBe(0); + }); + + it('clamps at 100', () => { + const score = applyGryphDelta(95, { commandEntropy: 0.1, repetitiveRetryBurst: true, largePatchWhileStale: true, largePatchLoc: 600 }, true, []); + expect(score).toBe(100); + }); +}); + +// ============================================================================ +// GryphBehaviorProvider — mocked child_process +// ============================================================================ + +describe('GryphBehaviorProvider', () => { + it('returns null when gryph not available', async () => { + vi.mock('node:child_process', () => ({ + spawnSync: vi.fn(() => ({ status: 1, stdout: null })), + spawn: vi.fn(), + })); + const provider = new GryphBehaviorProvider(); + const result = await provider.collect(new Date().toISOString()); + // may return null (gryph unavailable) or a snapshot — just must not throw + expect(result === null || typeof result === 'object').toBe(true); + vi.restoreAllMocks(); + }); +}); + +// ============================================================================ +// queryGryphSignals — backward compat +// ============================================================================ + +describe('queryGryphSignals', () => { + it('returns null when gryph unavailable', () => { + // No mock needed — gryph is not installed in test env + const result = queryGryphSignals(new Date().toISOString()); + expect(result).toBeNull(); + }); +}); + +// ============================================================================ +// startGryphPolling — lifecycle +// ============================================================================ + +describe('startGryphPolling', () => { + let dir: string; + + beforeEach(async () => { + vi.useFakeTimers(); + dir = await mkdtemp(join(tmpdir(), 'gryph-test-')); + await mkdir(join(dir, OPENLORE_DIR, 'telemetry'), { recursive: true }); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + it('calls provider after first interval', async () => { + const provider = new CountingProvider([null]); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + expect(provider.calls).toBe(0); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + expect(provider.calls).toBe(1); + + stop(); + }); + + it('stops polling after cleanup call', async () => { + const provider = new CountingProvider([null, null, null]); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + stop(); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS * 3); + expect(provider.calls).toBe(1); + }); + + it('single-flight: overlapping poll skipped', async () => { + const slow = new SlowProvider(); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider: slow }); + + // Fire two intervals while first poll is still running (50ms delay) + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + await vi.advanceTimersByTimeAsync(100); // let slow poll finish + + expect(slow.maxConcurrent).toBe(1); + stop(); + }); + + it('null snapshot — no panic state written', async () => { + const provider = new FixedProvider(null); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + // panic-state.json should not exist (no prior state) + const state = readPanicState(dir); + expect(state.panicScore).toBe(0); + }); + + it('snapshot with no actionable signals — no state update', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.8, + repetitiveRetryBurst: false, + shellActivity: true, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(0); + }); + + it('retry burst signal — updates panic state and syncs tracker', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + shellActivity: true, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(GRYPH_RETRY_BURST_DELTA); + expect(tracker.panicScore).toBe(GRYPH_RETRY_BURST_DELTA); + }); + + it('large patch while stale — updates panic state', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: false, + largePatchWhileStale: { loc: 800, entropy: 0.1 }, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker({ staleDepth: 2 }); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA); + }); + + it('large patch NOT stale — no delta', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: false, + largePatchWhileStale: { loc: 800, entropy: 0.1 }, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker({ staleDepth: 0 }); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(0); + }); + + it('provenance carries source:gryph', async () => { + const emitted: unknown[] = []; + vi.spyOn(await import('../telemetry.js'), 'emit').mockImplementation( + (_dir, _domain, payload) => { emitted.push(payload); }, + ); + + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + }; + const provider = new FixedProvider(snapshot); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const delta = emitted.find( + (e): e is Record => + typeof e === 'object' && e !== null && (e as Record)['event'] === 'panic_score_delta', + ); + expect(delta).toBeDefined(); + expect(delta?.['source']).toBe('gryph'); + const provenance = delta?.['provenance'] as Array>; + expect(provenance?.[0]?.['evidence']).toMatchObject({ source: 'gryph' }); + }); + + it('provider exception — fail-open, no throw', async () => { + const broken: RuntimeBehaviorProvider = { + async collect() { throw new Error('network error'); }, + }; + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider: broken }); + + await expect(vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100)).resolves.not.toThrow(); + stop(); + + expect(tracker.panicScore).toBe(0); + }); + + it('null tracker — still writes panic state', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + }; + const provider = new FixedProvider(snapshot); + const stop = startGryphPolling({ directory: dir, getTracker: () => null, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(GRYPH_RETRY_BURST_DELTA); + }); + + it('accumulates score across polls', async () => { + const snapshot: RuntimeBehaviorSnapshot = { + timestamp: Date.now(), + commandEntropy: 0.1, + repetitiveRetryBurst: true, + }; + const provider = new CountingProvider([snapshot, snapshot]); + const tracker = makeTracker(); + const stop = startGryphPolling({ directory: dir, getTracker: () => tracker, provider }); + + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS + 100); + await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); + stop(); + + const state = readPanicState(dir); + expect(state.panicScore).toBe(GRYPH_RETRY_BURST_DELTA * 2); + expect(tracker.panicScore).toBe(GRYPH_RETRY_BURST_DELTA * 2); + }); +}); diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts index ec28e7a3..18dd527f 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -1,31 +1,66 @@ /** - * Gryph bridge — optional integration with safedep/gryph observability tool. + * Gryph bridge — runtime behavioral observability provider. * - * Gryph records shell exec and file-write events to a local SQLite store, - * queryable via its CLI. Enriches panic score with signals openlore cannot - * observe directly (commandEntropy, retry bursts, large patches while stale). + * Promotes Gryph from optional score enrichment to first-class behavioral source. + * Runs a background poll loop that updates panic state independently of MCP tool + * calls, closing the blind spot where agents work purely via Bash/Edit/Read. * - * MUST degrade gracefully to zero-impact absence semantics: - * - gryph binary absent → returns null, no error, no log noise - * - query timeout (200ms) → returns null - * - unexpected output format → returns null - * - any exception → returns null + * Architecture: + * RuntimeBehaviorProvider (interface) + * └── GryphBehaviorProvider (impl: gryph query CLI) + * └── startGryphPolling (background loop → panic state) + * + * All failures degrade to zero-impact null semantics: + * - gryph binary absent → null + * - timeout → null + * - malformed output → null + * - any exception → null + * + * The poll loop MUST NOT block MCP execution, delay tool responses, or overlap. */ -import { spawnSync } from 'node:child_process'; +import { spawnSync, spawn } from 'node:child_process'; +import { emit } from '../telemetry.js'; +import { readPanicState, writePanicState, applyPanicHysteresis } from './panic-response.js'; +import type { PanicState, PanicLevel } from './panic-response.js'; +import type { EpistemicTracker } from './epistemic-lease.js'; +import { + PANIC_SCORE_MAX, + GRYPH_RETRY_BURST_DELTA, + GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA, + GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA, + GRYPH_LARGE_PATCH_LOC_THRESHOLD, + GRYPH_ENTROPY_LOW_THRESHOLD, + GRYPH_ENTROPY_HIGH_THRESHOLD, + GRYPH_POLL_INTERVAL_MS, + GRYPH_POLL_INTERVAL_MIN_MS, +} from './panic-constants.js'; // ============================================================================ // TYPES // ============================================================================ +/** Behavioral snapshot from a runtime observability source. */ +export interface RuntimeBehaviorSnapshot { + timestamp: number; + commandEntropy?: number; + repetitiveRetryBurst?: boolean; + failingCommandRate?: number; + largePatchWhileStale?: { loc: number; entropy: number }; + commandCount?: number; + shellActivity?: boolean; +} + +/** Abstraction for runtime behavioral data sources. */ +export interface RuntimeBehaviorProvider { + collect(since: string): Promise; +} + +/** Kept for backward compat with panic-check.ts enrichment path. */ export interface GryphSignals { - /** [0,1] diversity of recent command invocations. Low = retry loop. */ commandEntropy: number; - /** Low-entropy + repeated failing commands = destabilized shell activity. */ repetitiveRetryBurst: boolean; - /** Any write event > 500 LOC detected in the time window. */ largePatchWhileStale: boolean; - /** LOC count of the largest write event seen, 0 if none. */ largePatchLoc: number; } @@ -33,7 +68,7 @@ interface GryphExecEvent { timestamp?: string; action?: string; command?: string; - cmd?: string; // alternate key some versions use + cmd?: string; exit_code?: number; exitCode?: number; } @@ -48,29 +83,28 @@ interface GryphWriteEvent { additions?: number; } +interface SnapshotDeltaResult { + newScore: number; + newLevel: PanicLevel; + provenance: Array<{ name: string; delta: number; evidence: Record }>; +} + // ============================================================================ // CONSTANTS // ============================================================================ -// OPENLORE_GRYPH_TIMEOUT_MS overrides the default 150ms per-query budget. -const GRYPH_TIMEOUT_MS = Math.max(50, Number(process.env['OPENLORE_GRYPH_TIMEOUT_MS'] ?? 150)); -const GRYPH_DETECT_TIMEOUT_MS = 50; // PATH check (not user-configurable — boot critical) -const LARGE_PATCH_LOC_THRESHOLD = 500; -const ENTROPY_LOW_THRESHOLD = 0.30; // below = low-diversity / retry-loop +const GRYPH_TIMEOUT_MS = Math.max(50, Number(process.env['OPENLORE_GRYPH_TIMEOUT_MS'] ?? 150)); +const GRYPH_DETECT_TIMEOUT_MS = 50; // ============================================================================ // ENTROPY COMPUTATION // ============================================================================ -/** - * Normalised Shannon entropy of a command sequence. - * Returns 1.0 (high entropy / fail-open) when sequence is empty. - */ function computeCommandEntropy(commands: string[]): number { if (commands.length === 0) return 1; const counts = new Map(); for (const cmd of commands) { - const key = cmd.trim().split(/\s+/)[0] ?? cmd; // normalise to base command + const key = cmd.trim().split(/\s+/)[0] ?? cmd; counts.set(key, (counts.get(key) ?? 0) + 1); } const n = commands.length; @@ -103,15 +137,12 @@ function isGryphAvailable(): boolean { // QUERY HELPERS // ============================================================================ -function queryGryph(action: 'exec' | 'write', since: string): unknown[] { +/** Synchronous query — used by the backward-compat panic-check enrichment path. */ +function queryGryphSync(action: 'exec' | 'write', since: string): unknown[] { const result = spawnSync( 'gryph', ['query', '--format', 'json', '--action', action, '--since', since], - { - timeout: GRYPH_TIMEOUT_MS, - stdio: ['ignore', 'pipe', 'ignore'], - encoding: 'utf-8', - }, + { timeout: GRYPH_TIMEOUT_MS, stdio: ['ignore', 'pipe', 'ignore'], encoding: 'utf-8' }, ); if (result.status !== 0 || !result.stdout) return []; try { @@ -122,51 +153,246 @@ function queryGryph(action: 'exec' | 'write', since: string): unknown[] { } } +/** Async query — used by GryphBehaviorProvider polling path (non-blocking). */ +async function queryGryphAsync(action: 'exec' | 'write', since: string): Promise { + return new Promise((resolve) => { + const child = spawn( + 'gryph', + ['query', '--format', 'json', '--action', action, '--since', since], + { stdio: ['ignore', 'pipe', 'ignore'] }, + ); + const timer = setTimeout(() => { child.kill(); resolve([]); }, GRYPH_TIMEOUT_MS); + let output = ''; + child.stdout.on('data', (chunk: Buffer) => { output += chunk.toString(); }); + child.on('close', (code) => { + clearTimeout(timer); + if (code !== 0 || !output) { resolve([]); return; } + try { + const parsed = JSON.parse(output.trim()); + resolve(Array.isArray(parsed) ? parsed : []); + } catch { + resolve([]); + } + }); + child.on('error', () => { clearTimeout(timer); resolve([]); }); + }); +} + +// ============================================================================ +// SNAPSHOT DELTA — applies RuntimeBehaviorSnapshot to a panic state +// ============================================================================ + +function applySnapshotDelta( + snapshot: RuntimeBehaviorSnapshot, + state: PanicState, + staleDepth: number, +): SnapshotDeltaResult { + let delta = 0; + const provenance: SnapshotDeltaResult['provenance'] = []; + const isStale = staleDepth >= 2; + + if (snapshot.repetitiveRetryBurst) { + delta += GRYPH_RETRY_BURST_DELTA; + provenance.push({ + name: 'gryph_retry_burst', + delta: GRYPH_RETRY_BURST_DELTA, + evidence: { source: 'gryph', entropy: snapshot.commandEntropy ?? null }, + }); + } + + if (snapshot.largePatchWhileStale && isStale) { + const { loc, entropy } = snapshot.largePatchWhileStale; + const attenuated = entropy > GRYPH_ENTROPY_HIGH_THRESHOLD; + const d = attenuated ? GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA : GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA; + delta += d; + provenance.push({ + name: 'large_patch_while_stale', + delta: d, + evidence: { source: 'gryph', loc, entropy }, + }); + } + + if (delta === 0) { + return { newScore: state.panicScore, newLevel: state.panicLevel, provenance }; + } + + const newScore = Math.min(PANIC_SCORE_MAX, Math.max(0, state.panicScore + delta)); + const newLevel = applyPanicHysteresis(state.panicLevel, newScore, staleDepth); + return { newScore, newLevel, provenance }; +} + // ============================================================================ -// PUBLIC API +// GryphBehaviorProvider — RuntimeBehaviorProvider implementation // ============================================================================ +export class GryphBehaviorProvider implements RuntimeBehaviorProvider { + async collect(since: string): Promise { + try { + if (!isGryphAvailable()) return null; + + const [execEvents, writeEvents] = await Promise.all([ + queryGryphAsync('exec', since) as Promise, + queryGryphAsync('write', since) as Promise, + ]); + + const commands = (execEvents as GryphExecEvent[]) + .map(e => e.command ?? e.cmd ?? '') + .filter(Boolean); + const commandEntropy = computeCommandEntropy(commands); + + const failingCount = (execEvents as GryphExecEvent[]) + .filter(e => (e.exit_code ?? e.exitCode ?? 0) !== 0).length; + const failingCommandRate = execEvents.length > 0 ? failingCount / execEvents.length : 0; + const repetitiveRetryBurst = commandEntropy < GRYPH_ENTROPY_LOW_THRESHOLD && failingCount > 0; + + const locs = (writeEvents as GryphWriteEvent[]).map(e => e.lines ?? e.loc ?? e.additions ?? 0); + const maxLoc = locs.length > 0 ? Math.max(...locs) : 0; + + return { + timestamp: Date.now(), + commandEntropy, + repetitiveRetryBurst, + failingCommandRate, + largePatchWhileStale: maxLoc > GRYPH_LARGE_PATCH_LOC_THRESHOLD + ? { loc: maxLoc, entropy: commandEntropy } + : undefined, + commandCount: commands.length, + shellActivity: execEvents.length > 0, + }; + } catch { + return null; + } + } +} + +// ============================================================================ +// POLLING LIFECYCLE +// ============================================================================ + +export interface GryphPollingOptions { + directory: string; + /** Returns current stale depth from in-memory tracker. */ + getTracker: () => EpistemicTracker | null; + /** Optional provider override (for testing). */ + provider?: RuntimeBehaviorProvider; +} + /** - * Query Gryph for behavioral signals since `since` (ISO 8601). - * Returns null when Gryph is absent or any error occurs — callers must - * treat null as "no additional signals" (fail-open, zero-impact). + * Start background Gryph polling. Returns a cleanup function (call on shutdown). + * + * Invariants: + * - Never overlaps: single-flight protection skips polls while previous is running + * - Never blocks: async spawn, isolated from MCP execution path + * - Never throws: all errors caught, fail-open + * - Syncs tracker: panicScore/panicLevel updated in-memory after file write so + * the MCP path doesn't overwrite Gryph-elevated state on the next tool call + */ +export function startGryphPolling(opts: GryphPollingOptions): () => void { + const { directory, getTracker, provider = new GryphBehaviorProvider() } = opts; + + const intervalMs = Math.max( + GRYPH_POLL_INTERVAL_MIN_MS, + Number(process.env['OPENLORE_GRYPH_POLL_INTERVAL_MS'] ?? GRYPH_POLL_INTERVAL_MS), + ); + + let isPolling = false; + let lastPollAt = new Date(Date.now() - intervalMs).toISOString(); + + const poll = async (): Promise => { + if (isPolling) return; + isPolling = true; + try { + const since = lastPollAt; + lastPollAt = new Date().toISOString(); + + const snapshot = await provider.collect(since); + + emit(directory, 'panic', { + event: 'gryph_poll', + success: snapshot !== null, + shell_activity: snapshot?.shellActivity ?? false, + }); + + if (!snapshot) return; + + // No actionable signals — skip state update + if (!snapshot.repetitiveRetryBurst && !snapshot.largePatchWhileStale) return; + + const state = readPanicState(directory); + const tracker = getTracker(); + const staleDepth = tracker?.staleDepth ?? 0; + + const { newScore, newLevel, provenance } = applySnapshotDelta(snapshot, state, staleDepth); + if (newScore === state.panicScore && newLevel === state.panicLevel) return; + + const updatedState: PanicState = { + ...state, + panicScore: newScore, + panicLevel: newLevel, + updatedAt: new Date().toISOString(), + triggers: [...(state.triggers ?? []), ...provenance.map(p => p.name)], + }; + writePanicState(directory, updatedState); + + // Sync in-memory tracker so MCP path doesn't overwrite with stale score + if (tracker) { + tracker.panicScore = newScore; + tracker.panicLevel = newLevel as PanicLevel; + } + + emit(directory, 'panic', { + event: 'panic_score_delta', + source: 'gryph', + delta: newScore - state.panicScore, + from_score: state.panicScore, + to_score: newScore, + from_level: state.panicLevel, + to_level: newLevel, + provenance, + }); + } catch { + // fail-open: no error propagates + } finally { + isPolling = false; + } + }; + + const handle = setInterval(() => { void poll(); }, intervalMs); + return () => clearInterval(handle); +} + +// ============================================================================ +// BACKWARD COMPAT — panic-check.ts enrichment path (sync, pre-existing) +// ============================================================================ + +/** + * Synchronous Gryph query for the panic-check hook enrichment path. + * Returns null when Gryph is absent or any error occurs. */ export function queryGryphSignals(since: string): GryphSignals | null { try { if (!isGryphAvailable()) return null; - const execEvents = queryGryph('exec', since) as GryphExecEvent[]; - const writeEvents = queryGryph('write', since) as GryphWriteEvent[]; + const execEvents = queryGryphSync('exec', since) as GryphExecEvent[]; + const writeEvents = queryGryphSync('write', since) as GryphWriteEvent[]; - // commandEntropy from exec event command strings - const commands = execEvents - .map(e => e.command ?? e.cmd ?? '') - .filter(Boolean); + const commands = execEvents.map(e => e.command ?? e.cmd ?? '').filter(Boolean); const commandEntropy = computeCommandEntropy(commands); - - // Repetitive retry burst: low entropy AND any failing command in window const hasFailures = execEvents.some(e => (e.exit_code ?? e.exitCode ?? 0) !== 0); - const repetitiveRetryBurst = commandEntropy < ENTROPY_LOW_THRESHOLD && hasFailures; + const repetitiveRetryBurst = commandEntropy < GRYPH_ENTROPY_LOW_THRESHOLD && hasFailures; - // Large patch: find max LOC write event const locs = writeEvents.map(e => e.lines ?? e.loc ?? e.additions ?? 0); const largePatchLoc = locs.length > 0 ? Math.max(...locs) : 0; - const largePatchWhileStale = largePatchLoc > LARGE_PATCH_LOC_THRESHOLD; + const largePatchWhileStale = largePatchLoc > GRYPH_LARGE_PATCH_LOC_THRESHOLD; return { commandEntropy, repetitiveRetryBurst, largePatchWhileStale, largePatchLoc }; } catch { - return null; // always fail open + return null; } } /** - * Apply Gryph-derived score deltas to a base panic score. - * Returns the adjusted score (clamped [0,100]). - * - * Weights from spec: - * repetitive retry burst: +15 - * large patch (low entropy): +30 - * large patch (high entropy / legitimate refactor): +10 + * Apply Gryph-derived score deltas (backward compat — panic-check enrichment). */ export function applyGryphDelta( baseScore: number, @@ -177,16 +403,15 @@ export function applyGryphDelta( let delta = 0; if (signals.repetitiveRetryBurst) { - delta += 15; + delta += GRYPH_RETRY_BURST_DELTA; triggers.push('repetitive_retry_burst'); } if (signals.largePatchWhileStale && isStale) { - // Large patch attenuation: high entropy = deliberate refactor → +10, not +30 - const attenuated = signals.commandEntropy > 0.60; - delta += attenuated ? 10 : 30; + const attenuated = signals.commandEntropy > GRYPH_ENTROPY_HIGH_THRESHOLD; + delta += attenuated ? GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA : GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA; triggers.push(attenuated ? 'large_patch_attenuated' : 'large_patch_stale'); } - return Math.min(100, Math.max(0, baseScore + delta)); + return Math.min(PANIC_SCORE_MAX, Math.max(0, baseScore + delta)); } diff --git a/src/core/services/mcp-handlers/panic-constants.ts b/src/core/services/mcp-handlers/panic-constants.ts index 7c25d688..84f57653 100644 --- a/src/core/services/mcp-handlers/panic-constants.ts +++ b/src/core/services/mcp-handlers/panic-constants.ts @@ -82,6 +82,38 @@ export const HOOK_COOLDOWN_MS: Record = { 4: 0, }; +// ============================================================================ +// GRYPH SIGNAL WEIGHTS +// ============================================================================ + +/** Repetitive retry burst (low entropy + failing commands). */ +export const GRYPH_RETRY_BURST_DELTA = 15; + +/** Large patch while stale, low command entropy (non-deliberate). */ +export const GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA = 30; + +/** Large patch while stale, high command entropy (deliberate refactor — attenuated). */ +export const GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA = 10; + +/** LOC threshold for "large patch" classification. */ +export const GRYPH_LARGE_PATCH_LOC_THRESHOLD = 500; + +/** Command entropy below this = low-diversity / retry loop. */ +export const GRYPH_ENTROPY_LOW_THRESHOLD = 0.30; + +/** Command entropy above this = deliberate exploratory work (attenuation gate). */ +export const GRYPH_ENTROPY_HIGH_THRESHOLD = 0.60; + +// ============================================================================ +// GRYPH POLLING +// ============================================================================ + +/** Default poll interval for background Gryph behavioral ingestion. */ +export const GRYPH_POLL_INTERVAL_MS = 15_000; + +/** Minimum allowed poll interval (env override floor). */ +export const GRYPH_POLL_INTERVAL_MIN_MS = 5_000; + // ============================================================================ // SEVERITY MAP // ============================================================================ From 00780a18c18d484dd3d87a378bb9090bcdaca449 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Sat, 23 May 2026 12:19:04 +0200 Subject: [PATCH 18/22] feat(panic): gryph-watch daemon + CAS writes + while-loop poller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three architectural fixes from post-Gryph review: Gryph polling decoupled from MCP session (option B): - New `openlore gryph-watch` command: standalone observer process, lifetime independent of MCP server. Singleton via PID file, auto-detects project dir, exits on SIGTERM/SIGINT/stdin-close (parent death). - Installed via `openlore setup --hooks claude` as a UserPromptSubmit hook — starts once per session from the first user prompt, not the first tool call. - Closes the blind spot permanently: Gryph signals flow even when no openlore MCP tools are called during the entire session. - Removed Gryph polling from mcp.ts (was MCP-session-scoped, too late to start). CAS writes prevent multi-writer score regression: - `revision: number` added to PanicState — monotonically bumped on every write. - `casWritePanicState()`: synchronous read-check-write (atomic within Node.js event loop, no await between ops). Returns false on revision mismatch. - Gryph poll uses CAS with one retry: re-reads fresh state and recomputes delta if MCP wrote between poll read and poll write. - MCP path uses `writePanicState()` (returns new revision) and syncs `tracker.panicRevision` — prevents MCP from writing stale revision on next call. - `panicRevision` added to EpistemicTracker and `trackerToPanicState()`. setInterval replaced with while loop: - `while (!stopped) { await sleep(intervalMs); if (!stopped) await poll(); }` - Eliminates timer drift, stop lifecycle races, and orphan timer edge cases. - Sequential await guarantees no overlap (isPolling guard kept as defense-in-depth). Workspace registry in startGryphPolling: - Module-level `_pollerRegistry: Map void>` keyed by directory. - `startGryphPolling` stops any existing poller for the same directory before starting a new one — enforces one-per-workspace at the call site. --- src/cli/commands/gryph-watch.ts | 83 ++++++++++++++ src/cli/commands/mcp.ts | 13 +-- src/cli/commands/setup.ts | 65 +++++++++++ src/cli/index.ts | 2 + .../services/mcp-handlers/epistemic-lease.ts | 4 + .../mcp-handlers/gryph-bridge.test.ts | 1 + .../services/mcp-handlers/gryph-bridge.ts | 102 ++++++++++++------ .../mcp-handlers/panic-response.test.ts | 1 + .../services/mcp-handlers/panic-response.ts | 43 +++++++- 9 files changed, 267 insertions(+), 47 deletions(-) create mode 100644 src/cli/commands/gryph-watch.ts diff --git a/src/cli/commands/gryph-watch.ts b/src/cli/commands/gryph-watch.ts new file mode 100644 index 00000000..a8ea65b3 --- /dev/null +++ b/src/cli/commands/gryph-watch.ts @@ -0,0 +1,83 @@ +/** + * openlore gryph-watch + * + * Standalone Gryph behavioral observer. Runs as an independent background + * process — lifetime decoupled from the MCP server session. Polls Gryph every + * interval and writes behavioral signals to panic-state.json via CAS writes. + * + * Why a separate process: MCP-path Gryph polling only starts after the first + * openlore tool call. Agents working exclusively via Bash/Edit/Read never + * trigger that path. gryph-watch closes this gap by running continuously from + * session start. + * + * Signals provided (standalone, without MCP tracker context): + * repetitiveRetryBurst — low entropy + failing commands (no stale context needed) + * + * Signals requiring MCP tracker (not available here): + * largePatchWhileStale — staleDepth unknown without EpistemicLease session + * + * Install via: openlore setup --hooks claude + * Which installs a UserPromptSubmit hook: openlore gryph-watch & + */ + +import { Command } from 'commander'; +import { existsSync, readFileSync, writeFileSync, unlinkSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { OPENLORE_DIR } from '../../constants.js'; +import { readOpenLoreConfig } from '../../core/services/config-manager.js'; +import { startGryphPolling } from '../../core/services/mcp-handlers/gryph-bridge.js'; + +const PID_FILE = 'gryph-watch.pid'; + +function findProjectDirectory(startDir: string): string | null { + let dir = startDir; + for (;;) { + if (existsSync(join(dir, OPENLORE_DIR, 'config.json'))) return dir; + const parent = dirname(dir); + if (parent === dir) return null; + dir = parent; + } +} + +function isProcessAlive(pid: number): boolean { + try { process.kill(pid, 0); return true; } + catch { return false; } +} + +export const gryphWatchCommand = new Command('gryph-watch') + .description('Background Gryph behavioral observer (install via: openlore setup --hooks)') + .argument('[directory]', 'Project directory — auto-detected from cwd if omitted') + .action(async (directoryArg?: string) => { + const directory = directoryArg + ?? findProjectDirectory(process.cwd()) + ?? process.cwd(); + + const cfg = await readOpenLoreConfig(directory); + const mode = cfg?.panicResponse?.mode ?? 'off'; + if (mode === 'off') process.exit(0); + + // Singleton enforcement: one watcher per directory + const pidPath = join(directory, OPENLORE_DIR, PID_FILE); + if (existsSync(pidPath)) { + try { + const existing = parseInt(readFileSync(pidPath, 'utf-8').trim(), 10); + if (!isNaN(existing) && isProcessAlive(existing)) process.exit(0); + } catch { /* stale PID file — proceed */ } + } + try { writeFileSync(pidPath, String(process.pid), 'utf-8'); } catch { /* non-fatal */ } + + const cleanup = (): void => { + try { unlinkSync(pidPath); } catch { /* ignore */ } + process.exit(0); + }; + process.on('SIGTERM', cleanup); + process.on('SIGINT', cleanup); + // Detect parent process death via stdin EOF (pipe from shell/agent closes) + process.stdin.resume(); + process.stdin.on('close', cleanup); + + // startGryphPolling drives a while loop internally — pending setTimeout keeps + // the process alive. getTracker: () => null is intentional: staleDepth is + // unknown without an active MCP session; largePatchWhileStale is MCP-path-only. + startGryphPolling({ directory, getTracker: () => null }); + }); diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index 8c39c156..491ba254 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -35,7 +35,6 @@ import { createTracker, updateTracker, updatePanic, getFreshnessSignal, trackerT import type { EpistemicTracker } from '../../core/services/mcp-handlers/epistemic-lease.js'; import type { PanicResponseMode } from '../../types/index.js'; import { writePanicState, getPanicSignalText } from '../../core/services/mcp-handlers/panic-response.js'; -import { startGryphPolling } from '../../core/services/mcp-handlers/gryph-bridge.js'; import { emit } from '../../core/services/telemetry.js'; import { readOpenLoreConfig } from '../../core/services/config-manager.js'; import { DEFAULT_DRIFT_MAX_FILES } from '../../constants.js'; @@ -1311,7 +1310,6 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { let tracker: EpistemicTracker | undefined; let trackerDir = ''; let panicPolicy: PanicResponseMode = 'off'; - let stopGryphPolling: (() => void) | null = null; // --watch-auto: start the watcher on the first tool call that carries a directory let autoWatcher: import('../../core/services/mcp-watcher.js').McpWatcher | undefined; @@ -1358,19 +1356,12 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // Init (or re-init when project directory changes between calls) if (directory && (!tracker || directory !== trackerDir)) { - stopGryphPolling?.(); - stopGryphPolling = null; tracker = createTracker(directory); trackerDir = directory; const cfg = await readOpenLoreConfig(directory); panicPolicy = cfg?.panicResponse?.mode ?? 'off'; if (panicPolicy !== 'off') { emit(directory, 'panic-response', { event: 'panic_mode_active', mode: panicPolicy }); - const _tracker = tracker; - stopGryphPolling = startGryphPolling({ - directory, - getTracker: () => _tracker, - }); } } // Update epistemic state before dispatch (orient resets tracker internally). @@ -1388,7 +1379,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { directory, tool: name, }); - writePanicState(directory, trackerToPanicState(tracker, agentName)); + tracker.panicRevision = writePanicState(directory, trackerToPanicState(tracker, agentName)); } } @@ -1598,7 +1589,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { if (panicText) { content.push({ type: 'text', text: panicText }); tracker.interventionCountSinceStable++; - writePanicState(directory, trackerToPanicState(tracker, agentName)); + tracker.panicRevision = writePanicState(directory, trackerToPanicState(tracker, agentName)); emit(directory, 'panic', { event: 'panic_signal_injected', panic_level: tracker.panicLevel, diff --git a/src/cli/commands/setup.ts b/src/cli/commands/setup.ts index 08429ae2..bca20796 100644 --- a/src/cli/commands/setup.ts +++ b/src/cli/commands/setup.ts @@ -37,6 +37,7 @@ interface ClaudeHookSettings { hooks?: { PreToolUse?: Array<{ _comment?: string; [key: string]: unknown }>; PostToolUse?: Array<{ _comment?: string; [key: string]: unknown }>; + UserPromptSubmit?: Array<{ _comment?: string; [key: string]: unknown }>; [key: string]: unknown; }; [key: string]: unknown; @@ -70,6 +71,40 @@ export async function installPanicCheckHook(rootPath: string, format: string = ' logger.success(`panic-check PreToolUse hook added to .claude/settings.json (format: ${format})`); } +// ============================================================================ +// GRYPH WATCH HOOK +// Installs openlore gryph-watch as a UserPromptSubmit hook — starts the +// background Gryph observer once per session, decoupled from MCP tool calls. +// ============================================================================ + +const GRYPH_WATCH_HOOK_MARKER = 'openlore gryph-watch'; + +export async function installGryphWatchHook(rootPath: string): Promise { + const settingsPath = join(rootPath, '.claude', 'settings.json'); + let settings: ClaudeHookSettings = {}; + try { + settings = JSON.parse(await readFile(settingsPath, 'utf-8')) as ClaudeHookSettings; + } catch { /* start fresh */ } + + const hooks = settings.hooks?.UserPromptSubmit ?? []; + if (hooks.some((h) => JSON.stringify(h).includes(GRYPH_WATCH_HOOK_MARKER))) { + logger.success('gryph-watch UserPromptSubmit hook already present in .claude/settings.json'); + return; + } + + const hookEntry = { + _comment: 'openlore: start Gryph behavioral observer (singleton, background)', + type: 'command', + command: 'openlore gryph-watch &', + }; + settings.hooks ??= {}; + settings.hooks.UserPromptSubmit = [...hooks, hookEntry]; + + await mkdir(join(rootPath, '.claude'), { recursive: true }); + await writeFile(settingsPath, JSON.stringify(settings, null, 2) + '\n', 'utf-8'); + logger.success('gryph-watch UserPromptSubmit hook added to .claude/settings.json'); +} + // ============================================================================ // TYPES // ============================================================================ @@ -338,6 +373,35 @@ export const setupCommand = new Command('setup') const projectRoot = options.dir; const allTools: ToolName[] = ['vibe', 'cline', 'gsd', 'bmad', 'claude', 'opencode', 'omoa']; + // If only flag options (no tool install needed), run them and exit early + if (!options.tools && (options.hooks || options.panic) && !process.stdout.isTTY) { + if (options.hooks) { + const validFormats = ['claude', 'kilo', 'codex']; + const fmt = validFormats.includes(options.hooks) ? options.hooks : 'claude'; + if (!validFormats.includes(options.hooks)) { + logger.warning(`Unknown hooks format "${options.hooks}" — defaulting to "claude"`); + } + await installPanicCheckHook(projectRoot, fmt); + await installGryphWatchHook(projectRoot); + } + if (options.panic !== undefined) { + const validModes: PanicResponseMode[] = ['off', 'observe', 'advisory', 'experimental_blocking']; + if (!validModes.includes(options.panic as PanicResponseMode)) { + logger.error(`Unknown panic mode "${options.panic}". Valid: ${validModes.join(', ')}`); + } else { + const cfg = await readOpenLoreConfig(projectRoot); + if (!cfg) { + logger.warning('No .openlore/config.json found — run openlore init first.'); + } else { + cfg.panicResponse = { mode: options.panic as PanicResponseMode }; + await writeOpenLoreConfig(projectRoot, cfg); + logger.success(`panic response mode set to "${options.panic}"`); + } + } + } + process.exit(0); + } + let tools: ToolName[]; if (options.tools) { tools = (options.tools.split(',').map((t) => t.trim()) as ToolName[]).filter((t) => @@ -427,6 +491,7 @@ export const setupCommand = new Command('setup') logger.warning(`Unknown hooks format "${options.hooks}" — defaulting to "claude"`); } await installPanicCheckHook(projectRoot, fmt); + await installGryphWatchHook(projectRoot); } // --panic flag: update panicResponse.mode in .openlore/config.json diff --git a/src/cli/index.ts b/src/cli/index.ts index cee2674e..c09dbd7b 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -26,6 +26,7 @@ import { digestCommand } from './commands/digest.js'; import { decisionsCommand } from './commands/decisions.js'; import { telemetryCommand } from './commands/telemetry.js'; import { panicCheckCommand } from './commands/panic-check.js'; +import { gryphWatchCommand } from './commands/gryph-watch.js'; import { configureLogger } from '../utils/logger.js'; // Read version from package.json at runtime so it never drifts from the published version @@ -137,5 +138,6 @@ program.addCommand(digestCommand); program.addCommand(decisionsCommand); program.addCommand(telemetryCommand); program.addCommand(panicCheckCommand); +program.addCommand(gryphWatchCommand); program.parse(); diff --git a/src/core/services/mcp-handlers/epistemic-lease.ts b/src/core/services/mcp-handlers/epistemic-lease.ts index 55bb94bf..5635a334 100644 --- a/src/core/services/mcp-handlers/epistemic-lease.ts +++ b/src/core/services/mcp-handlers/epistemic-lease.ts @@ -97,6 +97,8 @@ export interface EpistemicTracker { panicTriggers: string[]; /** Epoch ms — upward panic signals suppressed until this time after orient() recovery. */ panicRecoverySuppressionUntil: number; + /** Revision of the last panic-state.json write (from MCP or Gryph sync). Used for CAS monotonicity. */ + panicRevision: number; } // ============================================================================ @@ -455,6 +457,7 @@ export function createTracker(directory: string): EpistemicTracker { lastPanicUpdateAt: 0, panicTriggers: [], panicRecoverySuppressionUntil: 0, + panicRevision: 0, }; } @@ -767,5 +770,6 @@ export function trackerToPanicState(tracker: EpistemicTracker, agentId?: string, : undefined, agentId, sessionId, + revision: tracker.panicRevision, }; } diff --git a/src/core/services/mcp-handlers/gryph-bridge.test.ts b/src/core/services/mcp-handlers/gryph-bridge.test.ts index 5ce68d56..4f4ca429 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.test.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.test.ts @@ -48,6 +48,7 @@ function makeTracker(overrides: Partial = {}): EpistemicTracke lastPanicUpdateAt: 0, panicTriggers: [], panicRecoverySuppressionUntil: 0, + panicRevision: 0, ...overrides, } as EpistemicTracker; } diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts index 18dd527f..7230f141 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -21,7 +21,7 @@ import { spawnSync, spawn } from 'node:child_process'; import { emit } from '../telemetry.js'; -import { readPanicState, writePanicState, applyPanicHysteresis } from './panic-response.js'; +import { readPanicState, writePanicState, casWritePanicState, applyPanicHysteresis } from './panic-response.js'; import type { PanicState, PanicLevel } from './panic-response.js'; import type { EpistemicTracker } from './epistemic-lease.js'; import { @@ -277,19 +277,27 @@ export interface GryphPollingOptions { provider?: RuntimeBehaviorProvider; } +/** One active poller per workspace directory — enforced by startGryphPolling. */ +const _pollerRegistry = new Map void>(); + /** * Start background Gryph polling. Returns a cleanup function (call on shutdown). * * Invariants: + * - One per workspace: registry stops any existing poller for the same directory * - Never overlaps: single-flight protection skips polls while previous is running * - Never blocks: async spawn, isolated from MCP execution path * - Never throws: all errors caught, fail-open - * - Syncs tracker: panicScore/panicLevel updated in-memory after file write so - * the MCP path doesn't overwrite Gryph-elevated state on the next tool call + * - CAS writes: uses compare-and-swap to prevent overwriting concurrent MCP writes + * - Syncs tracker: panicScore/panicLevel/panicRevision updated in-memory after write + * so the MCP path doesn't overwrite Gryph-elevated state on the next tool call */ export function startGryphPolling(opts: GryphPollingOptions): () => void { const { directory, getTracker, provider = new GryphBehaviorProvider() } = opts; + // Enforce one-per-workspace: stop any existing poller for this directory + _pollerRegistry.get(directory)?.(); + const intervalMs = Math.max( GRYPH_POLL_INTERVAL_MIN_MS, Number(process.env['OPENLORE_GRYPH_POLL_INTERVAL_MS'] ?? GRYPH_POLL_INTERVAL_MS), @@ -297,6 +305,7 @@ export function startGryphPolling(opts: GryphPollingOptions): () => void { let isPolling = false; let lastPollAt = new Date(Date.now() - intervalMs).toISOString(); + let stopped = false; const poll = async (): Promise => { if (isPolling) return; @@ -318,38 +327,51 @@ export function startGryphPolling(opts: GryphPollingOptions): () => void { // No actionable signals — skip state update if (!snapshot.repetitiveRetryBurst && !snapshot.largePatchWhileStale) return; - const state = readPanicState(directory); const tracker = getTracker(); const staleDepth = tracker?.staleDepth ?? 0; - const { newScore, newLevel, provenance } = applySnapshotDelta(snapshot, state, staleDepth); - if (newScore === state.panicScore && newLevel === state.panicLevel) return; - - const updatedState: PanicState = { - ...state, - panicScore: newScore, - panicLevel: newLevel, - updatedAt: new Date().toISOString(), - triggers: [...(state.triggers ?? []), ...provenance.map(p => p.name)], - }; - writePanicState(directory, updatedState); - - // Sync in-memory tracker so MCP path doesn't overwrite with stale score - if (tracker) { - tracker.panicScore = newScore; - tracker.panicLevel = newLevel as PanicLevel; + // CAS write with one retry on conflict (MCP may write between our read and write). + // All ops inside casWritePanicState are synchronous — atomic within the Node.js event loop. + let readState = readPanicState(directory); + let applyResult = applySnapshotDelta(snapshot, readState, staleDepth); + if (applyResult.newScore === readState.panicScore && applyResult.newLevel === readState.panicLevel) return; + + for (let attempt = 0; attempt < 2; attempt++) { + const candidate: PanicState = { + ...readState, + panicScore: applyResult.newScore, + panicLevel: applyResult.newLevel, + updatedAt: new Date().toISOString(), + triggers: [...(readState.triggers ?? []), ...applyResult.provenance.map(p => p.name)], + }; + if (casWritePanicState(directory, readState.revision, candidate)) { + const writtenRevision = readState.revision + 1; + // Sync in-memory tracker so MCP path doesn't overwrite with stale state + if (tracker) { + tracker.panicScore = applyResult.newScore; + tracker.panicLevel = applyResult.newLevel as PanicLevel; + tracker.panicRevision = writtenRevision; + } + emit(directory, 'panic', { + event: 'panic_score_delta', + source: 'gryph', + delta: applyResult.newScore - readState.panicScore, + from_score: readState.panicScore, + to_score: applyResult.newScore, + from_level: readState.panicLevel, + to_level: applyResult.newLevel, + provenance: applyResult.provenance, + }); + return; + } + // Conflict on first attempt — re-read and retry once + if (attempt === 0) { + readState = readPanicState(directory); + applyResult = applySnapshotDelta(snapshot, readState, staleDepth); + if (applyResult.newScore === readState.panicScore && applyResult.newLevel === readState.panicLevel) return; + } } - - emit(directory, 'panic', { - event: 'panic_score_delta', - source: 'gryph', - delta: newScore - state.panicScore, - from_score: state.panicScore, - to_score: newScore, - from_level: state.panicLevel, - to_level: newLevel, - provenance, - }); + // Both CAS attempts failed — skip this poll cycle, try again next interval } catch { // fail-open: no error propagates } finally { @@ -357,8 +379,22 @@ export function startGryphPolling(opts: GryphPollingOptions): () => void { } }; - const handle = setInterval(() => { void poll(); }, intervalMs); - return () => clearInterval(handle); + // While loop: sleep-before-poll preserves "first poll after one interval" semantics. + // Sequential await eliminates setInterval's timer drift and stop lifecycle races. + const run = async (): Promise => { + while (!stopped) { + await new Promise(r => setTimeout(r, intervalMs)); + if (!stopped) await poll(); + } + }; + void run(); + + const stop = (): void => { + stopped = true; + _pollerRegistry.delete(directory); + }; + _pollerRegistry.set(directory, stop); + return stop; } // ============================================================================ diff --git a/src/core/services/mcp-handlers/panic-response.test.ts b/src/core/services/mcp-handlers/panic-response.test.ts index bbdbe633..b585a49f 100644 --- a/src/core/services/mcp-handlers/panic-response.test.ts +++ b/src/core/services/mcp-handlers/panic-response.test.ts @@ -15,6 +15,7 @@ import { defaultPanicState, readPanicState, writePanicState, + casWritePanicState, buildPanicCheckOutput, getPanicSignalText, } from './panic-response.js'; diff --git a/src/core/services/mcp-handlers/panic-response.ts b/src/core/services/mcp-handlers/panic-response.ts index e65fd812..fb82211b 100644 --- a/src/core/services/mcp-handlers/panic-response.ts +++ b/src/core/services/mcp-handlers/panic-response.ts @@ -41,6 +41,8 @@ export interface PanicState { panicRecoverySuppressionUntil?: string; agentId?: string; sessionId?: string; + /** Monotonically increasing write counter. Used for CAS by concurrent writers (Gryph poll vs MCP). */ + revision: number; } export interface PanicCheckOutput { @@ -98,6 +100,7 @@ export function defaultPanicState(): PanicState { localityConfidence: 0, interventionCountSinceStable: 0, triggers: [], + revision: 0, }; } @@ -121,7 +124,7 @@ export function readPanicState(directory: string): PanicState { if (age > PANIC_SESSION_EXPIRY_MS) return defaultPanicState(); } - return { ...defaultPanicState(), ...parsed, schemaVersion: 1 }; + return { ...defaultPanicState(), ...parsed, schemaVersion: 1, revision: parsed.revision ?? 0 }; } catch { return defaultPanicState(); } @@ -129,16 +132,50 @@ export function readPanicState(directory: string): PanicState { /** * Atomically writes panic state. POSIX rename(2) is atomic on same filesystem. + * Bumps revision on every write — callers sync their own revision counter from the return value. * Never throws — must not crash the hot path. + * Returns the new revision written (or the existing revision if write failed). */ -export function writePanicState(directory: string, state: PanicState): void { +export function writePanicState(directory: string, state: PanicState): number { + const newRevision = (state.revision ?? 0) + 1; try { const path = join(directory, OPENLORE_DIR, PANIC_STATE_FILE); const tmp = `${path}.tmp`; - writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf-8'); + writeFileSync(tmp, JSON.stringify({ ...state, revision: newRevision }, null, 2), 'utf-8'); renameSync(tmp, path); + return newRevision; } catch { // never crash the hot path + return state.revision ?? 0; + } +} + +/** + * Compare-and-swap write for concurrent writers (Gryph poll path). + * All ops are synchronous — no await between read and write — so this is atomic + * within the Node.js event loop (no interleaving at JS level). + * Returns false if on-disk revision !== expectedRevision (stale read → caller retries). + */ +export function casWritePanicState( + directory: string, + expectedRevision: number, + state: PanicState, +): boolean { + try { + const path = join(directory, OPENLORE_DIR, PANIC_STATE_FILE); + const currentRevision = existsSync(path) + ? (() => { + try { return (JSON.parse(readFileSync(path, 'utf-8')) as Partial).revision ?? 0; } + catch { return 0; } + })() + : 0; + if (currentRevision !== expectedRevision) return false; + const tmp = `${path}.tmp`; + writeFileSync(tmp, JSON.stringify({ ...state, revision: expectedRevision + 1 }, null, 2), 'utf-8'); + renameSync(tmp, path); + return true; + } catch { + return false; } } From e26a55133bd2a13437d99dc1f4c995651456b2cd Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Sat, 23 May 2026 12:42:56 +0200 Subject: [PATCH 19/22] fix(gryph): resolve binary via common paths when PATH is restricted Hook environments (nohup, UserPromptSubmit) often strip PATH so `which gryph` fails even when the binary exists at ~/.local/bin/gryph. Falls back to checking ~/.local/bin, ~/go/bin, /usr/local/bin, /opt/homebrew/bin before giving up. Stores resolved path in _gryphBin so spawns use the absolute path, not 'gryph'. Co-Authored-By: Claude Sonnet 4.6 --- .../services/mcp-handlers/gryph-bridge.ts | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts index 7230f141..6c730b9c 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -20,6 +20,7 @@ */ import { spawnSync, spawn } from 'node:child_process'; +import { existsSync } from 'node:fs'; import { emit } from '../telemetry.js'; import { readPanicState, writePanicState, casWritePanicState, applyPanicHysteresis } from './panic-response.js'; import type { PanicState, PanicLevel } from './panic-response.js'; @@ -122,15 +123,34 @@ function computeCommandEntropy(commands: string[]): number { // ============================================================================ let _gryphAvailable: boolean | undefined; +let _gryphBin = 'gryph'; function isGryphAvailable(): boolean { if (_gryphAvailable !== undefined) return _gryphAvailable; + // Try PATH-resolution first (fast, works in interactive shells) const result = spawnSync('which', ['gryph'], { timeout: GRYPH_DETECT_TIMEOUT_MS, stdio: ['ignore', 'pipe', 'ignore'], }); - _gryphAvailable = result.status === 0 && Boolean(result.stdout?.toString().trim()); - return _gryphAvailable; + const fromPath = result.status === 0 ? result.stdout?.toString().trim() : ''; + if (fromPath) { + _gryphBin = fromPath; + _gryphAvailable = true; + return true; + } + // Fallback: check common install locations (hook environments often have restricted PATH) + const home = process.env['HOME'] ?? ''; + const candidates = [ + `${home}/.local/bin/gryph`, + `${home}/go/bin/gryph`, + '/usr/local/bin/gryph', + '/opt/homebrew/bin/gryph', + ]; + for (const p of candidates) { + if (existsSync(p)) { _gryphBin = p; _gryphAvailable = true; return true; } + } + _gryphAvailable = false; + return false; } // ============================================================================ @@ -140,7 +160,7 @@ function isGryphAvailable(): boolean { /** Synchronous query — used by the backward-compat panic-check enrichment path. */ function queryGryphSync(action: 'exec' | 'write', since: string): unknown[] { const result = spawnSync( - 'gryph', + _gryphBin, ['query', '--format', 'json', '--action', action, '--since', since], { timeout: GRYPH_TIMEOUT_MS, stdio: ['ignore', 'pipe', 'ignore'], encoding: 'utf-8' }, ); @@ -157,7 +177,7 @@ function queryGryphSync(action: 'exec' | 'write', since: string): unknown[] { async function queryGryphAsync(action: 'exec' | 'write', since: string): Promise { return new Promise((resolve) => { const child = spawn( - 'gryph', + _gryphBin, ['query', '--format', 'json', '--action', action, '--since', since], { stdio: ['ignore', 'pipe', 'ignore'] }, ); From 00ba626242ad7afeb3937355c18ae45a08a159f4 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Sat, 23 May 2026 14:51:04 +0200 Subject: [PATCH 20/22] fix(gryph): correct field casing and burst detection for real Gryph schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gryph events use PascalCase (Command, ExitCode, ResultStatus, LinesAdded) but code mapped lowercase fields — commands array was always empty, entropy always 1, repetitiveRetryBurst never fired. Two fixes: 1. Interface and mappings now use PascalCase with lowercase fallbacks 2. Burst detection adds OR path: failingCommandRate > 0.30 triggers regardless of entropy — catches mixed-window scenarios where diagnostic commands dilute the entropy signal below the 0.30 threshold Verified end-to-end: panic_score_delta events firing with correct provenance. Co-Authored-By: Claude Sonnet 4.6 --- .../services/mcp-handlers/gryph-bridge.ts | 45 ++++++++++++++----- .../services/mcp-handlers/panic-constants.ts | 3 ++ 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts index 6c730b9c..4dc75dee 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -33,6 +33,7 @@ import { GRYPH_LARGE_PATCH_LOC_THRESHOLD, GRYPH_ENTROPY_LOW_THRESHOLD, GRYPH_ENTROPY_HIGH_THRESHOLD, + GRYPH_FAILING_RATE_THRESHOLD, GRYPH_POLL_INTERVAL_MS, GRYPH_POLL_INTERVAL_MIN_MS, } from './panic-constants.js'; @@ -66,17 +67,26 @@ export interface GryphSignals { } interface GryphExecEvent { - timestamp?: string; - action?: string; + // PascalCase — actual Gryph schema + Command?: string; + ExitCode?: number; + ResultStatus?: string; + Timestamp?: string; + // snake_case / camelCase — kept for custom/future sources command?: string; cmd?: string; exit_code?: number; exitCode?: number; + result_status?: string; } interface GryphWriteEvent { - timestamp?: string; - action?: string; + // PascalCase — actual Gryph schema + Path?: string; + LinesAdded?: number; + LinesRemoved?: number; + Timestamp?: string; + // snake_case / camelCase — kept for custom/future sources path?: string; file?: string; lines?: number; @@ -256,16 +266,24 @@ export class GryphBehaviorProvider implements RuntimeBehaviorProvider { ]); const commands = (execEvents as GryphExecEvent[]) - .map(e => e.command ?? e.cmd ?? '') + .map(e => e.Command ?? e.command ?? e.cmd ?? '') .filter(Boolean); const commandEntropy = computeCommandEntropy(commands); const failingCount = (execEvents as GryphExecEvent[]) - .filter(e => (e.exit_code ?? e.exitCode ?? 0) !== 0).length; + .filter(e => { + const status = e.ResultStatus ?? e.result_status; + return status === 'error' || (e.ExitCode ?? e.exit_code ?? e.exitCode ?? 0) !== 0; + }).length; const failingCommandRate = execEvents.length > 0 ? failingCount / execEvents.length : 0; - const repetitiveRetryBurst = commandEntropy < GRYPH_ENTROPY_LOW_THRESHOLD && failingCount > 0; - - const locs = (writeEvents as GryphWriteEvent[]).map(e => e.lines ?? e.loc ?? e.additions ?? 0); + // Low entropy + any failure (pure retry loop) OR high failure rate regardless of entropy + const repetitiveRetryBurst = + (commandEntropy < GRYPH_ENTROPY_LOW_THRESHOLD && failingCount > 0) || + failingCommandRate > GRYPH_FAILING_RATE_THRESHOLD; + + const locs = (writeEvents as GryphWriteEvent[]).map( + e => e.LinesAdded ?? e.lines ?? e.loc ?? e.additions ?? 0, + ); const maxLoc = locs.length > 0 ? Math.max(...locs) : 0; return { @@ -432,12 +450,15 @@ export function queryGryphSignals(since: string): GryphSignals | null { const execEvents = queryGryphSync('exec', since) as GryphExecEvent[]; const writeEvents = queryGryphSync('write', since) as GryphWriteEvent[]; - const commands = execEvents.map(e => e.command ?? e.cmd ?? '').filter(Boolean); + const commands = execEvents.map(e => e.Command ?? e.command ?? e.cmd ?? '').filter(Boolean); const commandEntropy = computeCommandEntropy(commands); - const hasFailures = execEvents.some(e => (e.exit_code ?? e.exitCode ?? 0) !== 0); + const hasFailures = execEvents.some(e => { + const status = e.ResultStatus ?? e.result_status; + return status === 'error' || (e.ExitCode ?? e.exit_code ?? e.exitCode ?? 0) !== 0; + }); const repetitiveRetryBurst = commandEntropy < GRYPH_ENTROPY_LOW_THRESHOLD && hasFailures; - const locs = writeEvents.map(e => e.lines ?? e.loc ?? e.additions ?? 0); + const locs = writeEvents.map(e => e.LinesAdded ?? e.lines ?? e.loc ?? e.additions ?? 0); const largePatchLoc = locs.length > 0 ? Math.max(...locs) : 0; const largePatchWhileStale = largePatchLoc > GRYPH_LARGE_PATCH_LOC_THRESHOLD; diff --git a/src/core/services/mcp-handlers/panic-constants.ts b/src/core/services/mcp-handlers/panic-constants.ts index 84f57653..5c9ccc7d 100644 --- a/src/core/services/mcp-handlers/panic-constants.ts +++ b/src/core/services/mcp-handlers/panic-constants.ts @@ -104,6 +104,9 @@ export const GRYPH_ENTROPY_LOW_THRESHOLD = 0.30; /** Command entropy above this = deliberate exploratory work (attenuation gate). */ export const GRYPH_ENTROPY_HIGH_THRESHOLD = 0.60; +/** Failure rate above this triggers burst signal regardless of entropy (mixed-window robustness). */ +export const GRYPH_FAILING_RATE_THRESHOLD = 0.30; + // ============================================================================ // GRYPH POLLING // ============================================================================ From 00ec819a168e99d64b05dfa852d564c9ab85027c Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Sat, 23 May 2026 22:13:15 +0200 Subject: [PATCH 21/22] fix(panic): drop unread panic-response.jsonl emit, fix kilo double-prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit panic_mode_active was written to panic-response.jsonl which telemetry never reads — silent data loss. Mode is always derivable from config. kilo format was prepending [PANIC:SEVERITY] before a message that already contained its own tag, producing double-prefix output. --- src/cli/commands/mcp.ts | 3 --- src/cli/commands/panic-check.ts | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index 491ba254..ddfd18d1 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -1360,9 +1360,6 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { trackerDir = directory; const cfg = await readOpenLoreConfig(directory); panicPolicy = cfg?.panicResponse?.mode ?? 'off'; - if (panicPolicy !== 'off') { - emit(directory, 'panic-response', { event: 'panic_mode_active', mode: panicPolicy }); - } } // Update epistemic state before dispatch (orient resets tracker internally). // Invariant: only MCP tool calls (this path) feed panic. CLI commands (panic-check, diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index 8e707eb3..617cface 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -102,5 +102,5 @@ function formatOutput(output: ReturnType, format: // kilo: plain-text message (some runtimes just want a string signal) if (output.decision === 'allow') return ''; - return `[PANIC:${output.severity?.toUpperCase() ?? 'WARN'}] ${output.message ?? 'Destabilization detected — call orient().'}`; + return output.message ?? `[PANIC:${output.severity?.toUpperCase() ?? 'WARN'}] Destabilization detected — call orient().`; } From e490d9ef8e8d12112de4cdf28f88c3b77ef24973 Mon Sep 17 00:00:00 2001 From: Laurent FRANCOISE Date: Sat, 23 May 2026 22:29:54 +0200 Subject: [PATCH 22/22] feat(panic): decay in Gryph path, gryphWindowStart window, intervention feedback loop, panic-level statusline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Decay in Gryph path — applySnapshotDelta now applies passive wall-clock decay (PANIC_DECAY_PER_MIN) before adding Gryph signals. Score could previously only go up via Gryph while MCP was idle. 2. gryphWindowStart — new PanicState field separates the Gryph query window from lastOrientAt (which could be hours old). panic-check hook now queries only the ~2min window since last intervention, eliminating replaying hours of history. Advanced on each warn write. 3. Cooldown preservation — mcp.ts now reads disk state before writing to preserve lastHookInterventionAt and gryphWindowStart set by the panic-check process. Fixes broken cooldown: these fields were silently wiped on every MCP tool call. 4. Intervention feedback loop — mcp.ts detects orient() calls and emits panic_intervention_outcome event when orient followed a hook intervention within 5 minutes. Closes the measurement gap on whether interventions actually produce behavioral change. 5. panic-level command — new read-only `openlore panic-level` outputs "P:L{n}" (empty at L0) for status line integration. No side effects. Configure: openlore setup installs hooks separately. --- src/cli/commands/mcp.ts | 30 +++++++++++++++++-- src/cli/commands/panic-check.ts | 8 +++-- src/cli/commands/panic-level.ts | 27 +++++++++++++++++ src/cli/index.ts | 2 ++ .../mcp-handlers/gryph-bridge.test.ts | 11 +++++-- .../services/mcp-handlers/gryph-bridge.ts | 23 ++++++++++++-- .../services/mcp-handlers/panic-response.ts | 2 ++ 7 files changed, 91 insertions(+), 12 deletions(-) create mode 100644 src/cli/commands/panic-level.ts diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts index ddfd18d1..901c43cc 100644 --- a/src/cli/commands/mcp.ts +++ b/src/cli/commands/mcp.ts @@ -34,7 +34,7 @@ import { sanitizeMcpError, validateDirectory } from '../../core/services/mcp-han import { createTracker, updateTracker, updatePanic, getFreshnessSignal, trackerToPanicState } from '../../core/services/mcp-handlers/epistemic-lease.js'; import type { EpistemicTracker } from '../../core/services/mcp-handlers/epistemic-lease.js'; import type { PanicResponseMode } from '../../types/index.js'; -import { writePanicState, getPanicSignalText } from '../../core/services/mcp-handlers/panic-response.js'; +import { readPanicState, writePanicState, getPanicSignalText } from '../../core/services/mcp-handlers/panic-response.js'; import { emit } from '../../core/services/telemetry.js'; import { readOpenLoreConfig } from '../../core/services/config-manager.js'; import { DEFAULT_DRIFT_MAX_FILES } from '../../constants.js'; @@ -1366,17 +1366,41 @@ async function startMcpServer(options: McpServerOptions = {}): Promise { // telemetry) are separate processes that read state but never call updateTracker — // no recursive panic feedback loop from openlore internal commands. if (tracker && directory) { + const prevOrientResetAt = tracker.lastOrientResetAt; updateTracker(tracker, name, directory, typeof filePath === 'string' ? filePath : undefined); + const orientJustFired = tracker.lastOrientResetAt !== prevOrientResetAt; + if (panicPolicy !== 'off') { + // Read disk state to preserve hook-written fields (lastHookInterventionAt, gryphWindowStart) + // that panic-check (separate process) may have set since the last MCP write. + const diskState = readPanicState(directory); updatePanic(tracker, { density: tracker.density, oscillation: tracker.oscillation, - weight: 1, // weight read from TOOL_WEIGHTS inside updatePanic via opts — set baseline here + weight: 1, staleDepth: tracker.staleDepth, directory, tool: name, }); - tracker.panicRevision = writePanicState(directory, trackerToPanicState(tracker, agentName)); + const stateToWrite = { + ...trackerToPanicState(tracker, agentName), + lastHookInterventionAt: diskState.lastHookInterventionAt, + gryphWindowStart: diskState.gryphWindowStart, + }; + tracker.panicRevision = writePanicState(directory, stateToWrite); + + // Feedback loop: did orient() respond to a prior hook intervention? + if (orientJustFired && diskState.lastHookInterventionAt) { + const lagMs = Date.now() - new Date(diskState.lastHookInterventionAt).getTime(); + if (lagMs < 5 * 60 * 1000) { + emit(directory, 'panic', { + event: 'panic_intervention_outcome', + outcome: 'responded', + intervention_lag_ms: lagMs, + orient_kind: tracker.recentOrientCount >= 3 ? 'spam' : tracker.recentOrientCount >= 2 ? 'rapid' : 'normal', + }); + } + } } } diff --git a/src/cli/commands/panic-check.ts b/src/cli/commands/panic-check.ts index 617cface..d0dad572 100644 --- a/src/cli/commands/panic-check.ts +++ b/src/cli/commands/panic-check.ts @@ -37,8 +37,8 @@ export const panicCheckCommand = new Command('panic-check') let state = readPanicState(dir); - // Gryph enrichment — fail-open, query from lastOrientAt (or 15min ago if absent) - const since = state.lastOrientAt ?? new Date(Date.now() - 15 * 60 * 1000).toISOString(); + // Gryph enrichment — query from gryphWindowStart (2-min fallback avoids replaying hours of history) + const since = state.gryphWindowStart ?? new Date(Date.now() - 2 * 60 * 1000).toISOString(); const gryphSignals = queryGryphSignals(since); if (gryphSignals) { const enrichedTriggers = [...state.triggers]; @@ -61,9 +61,11 @@ export const panicCheckCommand = new Command('panic-check') if (output.decision === 'warn') { const newCount = state.interventionCountSinceStable + 1; + const now = new Date().toISOString(); writePanicState(dir, { ...state, - lastHookInterventionAt: new Date().toISOString(), + lastHookInterventionAt: now, + gryphWindowStart: now, interventionCountSinceStable: newCount, }); emit(dir, 'panic', { diff --git a/src/cli/commands/panic-level.ts b/src/cli/commands/panic-level.ts new file mode 100644 index 00000000..31ac8ae4 --- /dev/null +++ b/src/cli/commands/panic-level.ts @@ -0,0 +1,27 @@ +/** + * openlore panic-level + * + * Read-only status line output: current panic level as a compact string. + * No side effects, no writes — safe to call from a status line poller. + * + * Output: "P:L{n}" at L1–L4, empty string at L0. + * Exit: always 0. + */ + +import { Command } from 'commander'; +import { readPanicState } from '../../core/services/mcp-handlers/panic-response.js'; + +export const panicLevelCommand = new Command('panic-level') + .description('Output current panic level for status line display (read-only, exits 0)') + .option('-d, --directory ', 'Project directory', process.cwd()) + .action((options: { directory: string }) => { + try { + const state = readPanicState(options.directory); + if (state.panicLevel > 0) { + process.stdout.write(`P:L${state.panicLevel}`); + } + } catch { + // fail-open: output nothing + } + process.exit(0); + }); diff --git a/src/cli/index.ts b/src/cli/index.ts index c09dbd7b..0357f25c 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -26,6 +26,7 @@ import { digestCommand } from './commands/digest.js'; import { decisionsCommand } from './commands/decisions.js'; import { telemetryCommand } from './commands/telemetry.js'; import { panicCheckCommand } from './commands/panic-check.js'; +import { panicLevelCommand } from './commands/panic-level.js'; import { gryphWatchCommand } from './commands/gryph-watch.js'; import { configureLogger } from '../utils/logger.js'; @@ -138,6 +139,7 @@ program.addCommand(digestCommand); program.addCommand(decisionsCommand); program.addCommand(telemetryCommand); program.addCommand(panicCheckCommand); +program.addCommand(panicLevelCommand); program.addCommand(gryphWatchCommand); program.parse(); diff --git a/src/core/services/mcp-handlers/gryph-bridge.test.ts b/src/core/services/mcp-handlers/gryph-bridge.test.ts index 4f4ca429..6f12b5e8 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.test.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.test.ts @@ -14,6 +14,7 @@ import { startGryphPolling, applyGryphDelta, queryGryphSignals, + _resetGryphAvailabilityForTesting, } from './gryph-bridge.js'; import type { RuntimeBehaviorProvider, RuntimeBehaviorSnapshot } from './gryph-bridge.js'; import { readPanicState, writePanicState, defaultPanicState } from './panic-response.js'; @@ -23,6 +24,7 @@ import { GRYPH_LARGE_PATCH_LOW_ENTROPY_DELTA, GRYPH_LARGE_PATCH_HIGH_ENTROPY_DELTA, GRYPH_POLL_INTERVAL_MS, + PANIC_DECAY_PER_MIN, } from './panic-constants.js'; // ============================================================================ @@ -146,7 +148,7 @@ describe('GryphBehaviorProvider', () => { describe('queryGryphSignals', () => { it('returns null when gryph unavailable', () => { - // No mock needed — gryph is not installed in test env + _resetGryphAvailabilityForTesting(false); const result = queryGryphSignals(new Date().toISOString()); expect(result).toBeNull(); }); @@ -364,8 +366,11 @@ describe('startGryphPolling', () => { await vi.advanceTimersByTimeAsync(GRYPH_POLL_INTERVAL_MS); stop(); + // Second poll applies decay for time elapsed since first poll (GRYPH_POLL_INTERVAL_MS). + const decayPerPoll = Math.floor((GRYPH_POLL_INTERVAL_MS / 60_000) * PANIC_DECAY_PER_MIN); + const expected = GRYPH_RETRY_BURST_DELTA * 2 - decayPerPoll; const state = readPanicState(dir); - expect(state.panicScore).toBe(GRYPH_RETRY_BURST_DELTA * 2); - expect(tracker.panicScore).toBe(GRYPH_RETRY_BURST_DELTA * 2); + expect(state.panicScore).toBe(expected); + expect(tracker.panicScore).toBe(expected); }); }); diff --git a/src/core/services/mcp-handlers/gryph-bridge.ts b/src/core/services/mcp-handlers/gryph-bridge.ts index 4dc75dee..d727ffc8 100644 --- a/src/core/services/mcp-handlers/gryph-bridge.ts +++ b/src/core/services/mcp-handlers/gryph-bridge.ts @@ -34,6 +34,7 @@ import { GRYPH_ENTROPY_LOW_THRESHOLD, GRYPH_ENTROPY_HIGH_THRESHOLD, GRYPH_FAILING_RATE_THRESHOLD, + PANIC_DECAY_PER_MIN, GRYPH_POLL_INTERVAL_MS, GRYPH_POLL_INTERVAL_MIN_MS, } from './panic-constants.js'; @@ -135,6 +136,12 @@ function computeCommandEntropy(commands: string[]): number { let _gryphAvailable: boolean | undefined; let _gryphBin = 'gryph'; +/** Reset availability cache — for testing only. */ +export function _resetGryphAvailabilityForTesting(available = false): void { + _gryphAvailable = available; + _gryphBin = 'gryph'; +} + function isGryphAvailable(): boolean { if (_gryphAvailable !== undefined) return _gryphAvailable; // Try PATH-resolution first (fast, works in interactive shells) @@ -217,8 +224,18 @@ function applySnapshotDelta( state: PanicState, staleDepth: number, ): SnapshotDeltaResult { - let delta = 0; + const now = Date.now(); + const elapsedMin = state.updatedAt + ? Math.max(0, (now - new Date(state.updatedAt).getTime()) / 60_000) + : 0; + const decayDelta = -Math.floor(elapsedMin * PANIC_DECAY_PER_MIN); + + let delta = decayDelta; const provenance: SnapshotDeltaResult['provenance'] = []; + if (decayDelta < 0) { + provenance.push({ name: 'passive_decay', delta: decayDelta, evidence: { elapsed_min: Math.round(elapsedMin * 100) / 100 } }); + } + const isStale = staleDepth >= 2; if (snapshot.repetitiveRetryBurst) { @@ -242,8 +259,8 @@ function applySnapshotDelta( }); } - if (delta === 0) { - return { newScore: state.panicScore, newLevel: state.panicLevel, provenance }; + if (delta === 0 || (delta === decayDelta && state.panicScore === 0)) { + return { newScore: state.panicScore, newLevel: state.panicLevel, provenance: [] }; } const newScore = Math.min(PANIC_SCORE_MAX, Math.max(0, state.panicScore + delta)); diff --git a/src/core/services/mcp-handlers/panic-response.ts b/src/core/services/mcp-handlers/panic-response.ts index fb82211b..be18ab21 100644 --- a/src/core/services/mcp-handlers/panic-response.ts +++ b/src/core/services/mcp-handlers/panic-response.ts @@ -39,6 +39,8 @@ export interface PanicState { triggers: string[]; /** ISO — upward signals suppressed until this timestamp after an orient() recovery. */ panicRecoverySuppressionUntil?: string; + /** ISO — start of the Gryph query window for the panic-check hook path. Advanced on each intervention write. */ + gryphWindowStart?: string; agentId?: string; sessionId?: string; /** Monotonically increasing write counter. Used for CAS by concurrent writers (Gryph poll vs MCP). */