Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/plugin-validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,9 @@ jobs:

- name: Run validate-bp-contract self-tests (golden corpus + stable-ID E2E)
run: node tests/test-validate-bp-contract.mjs

- name: Run command-classifier regression suite (label-emit / _priority lock)
run: bash tests/test-command-classifier.sh

- name: Run classifier taxonomy runtime-sourcing tests (RFC-008 P3c, R4/F4)
run: bash tests/test-classifier-taxonomy-sync.sh
53 changes: 53 additions & 0 deletions install.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,13 @@ if (fs.existsSync(repoPatternsIndex)) {
console.log(`Installed patterns/_index.json to ${globalPatternsDir}`)
}

// NOTE: patterns/taxonomy.json is a RUNTIME dependency of command-classifier.sh,
// NOT a global-validation artifact like _index.json — so it is co-deployed WITH
// the classifier inside the `if (installHooks)` block (§5a-tax below), never
// unconditionally here. Deploying it in the main body would advance runtime
// candidate-1 on a no-hooks install while leaving an already-installed classifier
// stale + unwarned (PR-level codex BLOCKER; R4/F4 root parity + sync coupling).

// ---------------------------------------------------------------------------
// 2. Create local .episodic-memory in target project
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -1202,6 +1209,52 @@ if (installHooks) {
)
}

// 5a-tax. RFC-008 P3c (R4/F4): co-deploy patterns/taxonomy.json to the SAME
// global root the classifier reads at runtime (candidate 1 =
// $HOME/.episodic-memory/patterns/taxonomy.json; GLOBAL_DIR = os.homedir()/
// .episodic-memory, no EPISODIC_MEMORY_HOME indirection — codex R2-P2 root
// parity). This is INSIDE the installHooks block so taxonomy and classifier
// advance together: a no-hooks install touches neither (PR-level codex
// BLOCKER — taxonomy must not advance candidate-1 while the installed
// classifier stays stale + unwarned).
const repoTaxonomy = path.join(REPO_DIR, 'patterns', 'taxonomy.json')
if (fs.existsSync(repoTaxonomy)) {
fs.mkdirSync(globalPatternsDir, { recursive: true })
fs.copyFileSync(repoTaxonomy, path.join(globalPatternsDir, 'taxonomy.json'))
console.log(`Installed patterns/taxonomy.json to ${globalPatternsDir}`)
}

// RFC-008 P3c (R4/F4, codex R1-P1b): if the command classifier was KEPT as a
// divergent local edit while taxonomy.json was (re)deployed just above, the
// installed classifier and the global taxonomy may disagree. Two cases,
// distinguished by whether the kept file carries the runtime-sourcing helper:
// pre-P3c (no _ensure_taxonomy_synced): runs stale hardcoded labels and is
// NOT taxonomy-synced — the gate is silently unprotected by
// runtime-sourcing (no fail-closed at all).
// post-P3c (has the helper): will FAIL CLOSED loudly on any drift until
// re-forced.
if (libResults['command-classifier.sh'] === 'skipped-divergent') {
let keptClassifier = ''
try {
keptClassifier = fs.readFileSync(
path.join(userHooksLibDir, 'command-classifier.sh'), 'utf8')
} catch { /* unreadable → treat as pre-P3c (no helper) below */ }
if (keptClassifier.includes('_ensure_taxonomy_synced')) {
console.log(
'WARNING: command-classifier.sh kept (divergent local edit) while ' +
'taxonomy.json was redeployed — the kept classifier will FAIL CLOSED ' +
'on any taxonomy drift. Re-run with --install-hooks-force to sync.'
)
} else {
console.log(
'WARNING: command-classifier.sh kept (divergent local edit) is pre-P3c ' +
'— it does NOT runtime-source taxonomy.json and is NOT taxonomy-synced ' +
'(runs stale hardcoded labels). Re-run with --install-hooks-force to ' +
'install runtime label-sourcing.'
)
}
}

// 5a. Hook specs imported from scripts/lib/install-manifest.mjs (single
// source of truth shared with tools/migration-cutover.mjs). Closes
// Codex round-2 implementation attention point: avoid a second
Expand Down
145 changes: 145 additions & 0 deletions plugins/claude-code/hooks/lib/command-classifier.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2400,6 +2400,129 @@ _resolve_marker_path() {
# cwd or a worktree. Codex R1 P1 reproduced marker miss for that
# divergence.
#
# ---------------------------------------------------------------------------
# Runtime taxonomy sourcing (RFC-008 P3c, maps to R4 / F4 / F6)
# ---------------------------------------------------------------------------
# The label vocabulary is single-sourced from patterns/taxonomy.json at runtime
# and fail-closed if it drifts from this classifier's own label authority. Per
# F4 (OQ-2 closed) this eliminates a hand-maintained bash label list "by
# construction"; the bash label authority is the _priority() case-arms (already
# CI-validated == taxonomy.labels by validate-bp-contract.mjs assertion 7b), so
# the runtime check compares taxonomy.labels against _priority arms — NO second
# parallel list (adversarial GAP-1).
#
# Resolution (codex R2-P1 / R2-P2):
# candidate 1: $HOME/.episodic-memory/patterns/taxonomy.json — the SAME root
# install.mjs writes (GLOBAL_DIR, install.mjs:24) and the idiom
# the hooks already use (checkpoint-gate.sh:741). No
# EPISODIC_MEMORY_HOME indirection (os.homedir() wouldn't honor
# it); tests isolate via HOME.
# candidate 2: in-repo copy via the $BASH_SOURCE climb, used ONLY when the
# climbed root PROVES it is the repo copy (repo sentinels present
# AND the classifier path round-trips). The installed layout
# (~/.claude/hooks/lib) fails the predicate, so it can never read
# an ambient parent patterns/taxonomy.json (authority-root
# containment).
# No EM_TAXONOMY_PATH env override (codex R1-P1: command-local env taxonomy
# authority is a bypass vector, PR-271).
#
# Fail-closed surfaces via a blocking label (unsafe_complex) with a distinct
# reason so logs disambiguate misconfig from a dangerous command. marker_write
# is NEVER fail-closed (deadlock-class-1 escape hatch, taxonomy.json
# non_overridable_rationale): classify_command exempts marker_write and
# classify_path's marker cases return before the guard.

_TAXONOMY_SYNC_DONE="" # plain, NON-exported per-process guard (adversarial
_TAXONOMY_SYNC_STATUS="" # axis-7: a child re-sources + re-validates rather
_TAXONOMY_SYNC_REASON="" # than inheriting a stale "passed" flag)

# Physical (symlink-resolved) path of a file by resolving its DIRECTORY via
# `cd -P` (cross-platform; no GNU `readlink -f`). Resolves the /var→/private/var
# class that fail-opened P3b-1's isMain check.
_taxonomy_file_realpath() {
local f="$1" d b
d="$(dirname "$f")" || return 1
b="$(basename "$f")"
d="$(cd -P "$d" 2>/dev/null && pwd)" || return 1
printf '%s/%s' "$d" "$b"
}

# The single bash label authority: the _priority() case-arm names, sorted and
# space-joined. declare -f is bash-native and formatting-stable for our regex
# (matches `<label>)` whether inline or on its own line; never matches `*)` or
# `case "$1" in`).
_priority_arm_labels() {
declare -f _priority \
| sed -n 's/^[[:space:]]*\([a-z][a-z_]*\)).*/\1/p' \
| sort | tr '\n' ' ' | sed 's/[[:space:]]*$//'
}

# Echo the resolved taxonomy.json path, or empty + return 1 if none qualifies.
_taxonomy_resolve_path() {
# Candidate 1 — global install root (== install.mjs GLOBAL_DIR).
local c1="$HOME/.episodic-memory/patterns/taxonomy.json"
if [ -f "$c1" ]; then
_taxonomy_file_realpath "$c1" 2>/dev/null || printf '%s' "$c1"
return 0
fi
# Candidate 2 — in-repo copy, CONDITIONAL on a repo-layout proof predicate.
local self_dir climbed
self_dir="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)" || return 1
climbed="$(cd -P "$self_dir/../../../.." 2>/dev/null && pwd)" || return 1
# (a) repo sentinels present at the climbed root.
[ -f "$climbed/patterns/taxonomy.schema.json" ] || return 1
[ -f "$climbed/scripts/em-store.mjs" ] || return 1
# (b) the climbed classifier path round-trips back to THIS sourced file.
local rp_self rp_climbed
rp_self="$(_taxonomy_file_realpath "${BASH_SOURCE[0]}" 2>/dev/null)" || return 1
rp_climbed="$(_taxonomy_file_realpath "$climbed/plugins/claude-code/hooks/lib/command-classifier.sh" 2>/dev/null)" || return 1
[ "$rp_self" = "$rp_climbed" ] || return 1
local c2="$climbed/patterns/taxonomy.json"
[ -f "$c2" ] || return 1
_taxonomy_file_realpath "$c2" 2>/dev/null || printf '%s' "$c2"
return 0
}

# Validate (once per process) that taxonomy.labels == _priority arms. Returns 0
# when synced; non-zero on drift/unresolved with the reason cached in
# _TAXONOMY_SYNC_REASON.
_ensure_taxonomy_synced() {
if [ -n "$_TAXONOMY_SYNC_DONE" ]; then
[ "$_TAXONOMY_SYNC_STATUS" = "ok" ]
return
fi
_TAXONOMY_SYNC_DONE=1

local tax_path
tax_path="$(_taxonomy_resolve_path)"
if [ -z "$tax_path" ] || [ ! -f "$tax_path" ]; then
_TAXONOMY_SYNC_STATUS="unresolved"
_TAXONOMY_SYNC_REASON="taxonomy_unresolved"
return 1
fi

# Zero-dep node read; exit!=0 on missing node / parse error / non-array labels
# (bash MUST branch on the exit code, not merely empty stdout — adversarial
# GAP-3). readFileSync+JSON.parse, NOT require() (require-cache footgun, Q2).
local actual
if ! actual="$(node -e 'const fs=require("fs");const t=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));if(!Array.isArray(t.labels))process.exit(3);process.stdout.write(t.labels.map(l=>l.id).sort().join(" "))' "$tax_path" 2>/dev/null)"; then
_TAXONOMY_SYNC_STATUS="unresolved"
_TAXONOMY_SYNC_REASON="taxonomy_unresolved"
return 1
fi

local expected
expected="$(_priority_arm_labels)"
if [ "$actual" = "$expected" ]; then
_TAXONOMY_SYNC_STATUS="ok"
_TAXONOMY_SYNC_REASON=""
return 0
fi
_TAXONOMY_SYNC_STATUS="drift"
_TAXONOMY_SYNC_REASON="taxonomy_drift"
return 1
}

# Output: LABEL\tTARGET\tREASON
classify_command() {
local cmd="$1"
Expand Down Expand Up @@ -2485,6 +2608,19 @@ classify_command() {
final_reason="empty_command"
fi

# RFC-008 P3c (R4/F4): fail-closed on taxonomy drift/unresolved. The guard
# NEVER overrides the two NON-OVERRIDABLE labels — marker_write (deadlock
# escape hatch) and unsafe_complex (already maximally blocking) — symmetric
# with taxonomy.json non_overridable. Everything else degrades to
# unsafe_complex so a possibly-mislabeled write cannot slip through as a read.
if [ "$final_label" != "marker_write" ] && [ "$final_label" != "unsafe_complex" ]; then
if ! _ensure_taxonomy_synced; then
final_label="unsafe_complex"
final_target=""
final_reason="$_TAXONOMY_SYNC_REASON"
fi
fi

printf '%s\t%s\t%s\n' "$final_label" "$final_target" "$final_reason"
}

Expand Down Expand Up @@ -2544,6 +2680,15 @@ classify_path() {
return 0
;;
esac
# RFC-008 P3c (R4/F4): fail-closed on taxonomy drift/unresolved for the
# non-marker write path. The marker cases above already returned (deadlock
# escape preserved). codex R1-P1: classify_path is the SECOND public label
# emitter (Write/Edit/MultiEdit/NotebookEdit via checkpoint-gate.sh:1359) and
# MUST front the same shared guard as classify_command.
if ! _ensure_taxonomy_synced; then
printf '%s\t\t%s\n' "unsafe_complex" "$_TAXONOMY_SYNC_REASON"
return 0
fi
printf '%s\t\t%s\n' "shared_write" "path_default"
return 0
}
Expand Down
111 changes: 110 additions & 1 deletion scripts/validate-bp-contract.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,12 @@ export function extractPriorityArms(text, relPath, violation) {
// keyword form `function _priority`.
if (/^\s*\(\s*\)/.test(after) || /\bfunction\s+$/.test(before)) { defIdx.push(i); continue; }
if (/\$\(\s*$/.test(before)) continue; // $( call site (this OCCURRENCE only — the rest of the line is still scanned)
// `declare -f _priority` (P3c runtime-sourcing) READS the function body;
// it cannot redefine it, so this OCCURRENCE is inert like a $(-call site.
// A trailing same-line redefinition (`declare -f _priority; _priority() {`)
// is still caught — its `() {` is a SEPARATE occurrence flagged as a def
// opener (→ defIdx > 1 fails). Allowlist only the `declare -<…f…> ` prefix.
if (/\bdeclare\s+-[A-Za-z]*f[A-Za-z]*\s+$/.test(before)) continue;
unproven.push(i + 1);
}
}
Expand Down Expand Up @@ -272,6 +278,106 @@ export function extractPriorityArms(text, relPath, violation) {
return arms;
}

/**
* Assertion 7c (RFC-008 P3c — maps to R4 / F4 / F6). Verifies the default
* classifier RUNTIME-SOURCES its label set from taxonomy.json. Robust parser,
* not a plain grep (codex R1-P2: grep passes on comments / dead code):
* (a) `_ensure_taxonomy_synced` is defined exactly once (non-comment).
* (b) it is called (live, non-definition) from BOTH `classify_command` and
* `classify_path` bodies — the two public label emitters (codex R1-P1).
* (c) every emit-site label LITERAL (`printf '<fmt>' "<label>" …`) is a
* taxonomy label — F6 vocabulary closure over emit sites, INDEPENDENT of
* the `_priority` arms (7b); catches a typo'd `shared_writ` 7b can't see.
* Returns true when all pass; emits violations + returns false otherwise.
* Body ranges are delimited by the next COLUMN-0 function opener so `${…}`
* expansions and the nested `_consider()` helper don't perturb the scan.
*/
export function checkTaxonomySourcing(text, relPath, labelIds, violation) {
// Comment-aware logical-line assembly (mirror of extractPriorityArms).
const phys = text.split(/\r?\n/);
const lines = [];
for (let i = 0; i < phys.length; i++) {
let cur = phys[i];
if (!/^\s*#/.test(cur)) {
while (cur.endsWith("\\") && i + 1 < phys.length) { cur = cur.slice(0, -1) + phys[++i]; }
}
lines.push(cur);
}
const isComment = (l) => l.trim().startsWith("#");
const GUARD = "_ensure_taxonomy_synced";
const GUARD_DEF_RE = /^\s*(?:function\s+)?_ensure_taxonomy_synced\s*\(\s*\)\s*\{/;
const GUARD_TOKEN_RE = /\b_ensure_taxonomy_synced\b/;
const TOPFN_RE = /^[A-Za-z_][A-Za-z0-9_]*\s*\(\s*\)\s*\{/;
let ok = true;

// (a) exactly one non-comment definition.
const defLines = [];
for (let i = 0; i < lines.length; i++) {
if (!isComment(lines[i]) && GUARD_DEF_RE.test(lines[i])) defLines.push(i);
}
if (defLines.length === 0) {
violation("7", `${relPath}: no ${GUARD}() definition — R4/F4 require the default classifier to runtime-source taxonomy.json (fail-closed)`);
ok = false;
} else if (defLines.length > 1) {
violation("7", `${relPath}: ${defLines.length} ${GUARD}() definitions (expected exactly one) — bash last-wins ambiguity (fail-closed)`);
ok = false;
}

// (b) live call from BOTH public entry-point bodies.
const bodyRange = (fnRe) => {
let start = -1;
for (let i = 0; i < lines.length; i++) { if (fnRe.test(lines[i])) { start = i; break; } }
if (start === -1) return null;
let end = lines.length - 1;
for (let i = start + 1; i < lines.length; i++) { if (TOPFN_RE.test(lines[i])) { end = i - 1; break; } }
return [start, end];
};
const callsGuardIn = (range) => {
for (let i = range[0]; i <= range[1]; i++) {
if (isComment(lines[i])) continue;
if (GUARD_DEF_RE.test(lines[i])) continue; // the definition, not a call
if (GUARD_TOKEN_RE.test(lines[i])) return true; // a live call/use
}
return false;
};
for (const [fn, fnRe] of [
["classify_command", /^classify_command\s*\(\s*\)\s*\{/],
["classify_path", /^classify_path\s*\(\s*\)\s*\{/],
]) {
const range = bodyRange(fnRe);
if (!range) { violation("7", `${relPath}: ${fn}() body not found — cannot verify ${GUARD} is wired into it (fail-closed)`); ok = false; continue; }
if (!callsGuardIn(range)) { violation("7", `${relPath}: ${fn}() does not call ${GUARD} — both public emitters must front the runtime-sourcing guard (codex R1-P1; fail-closed)`); ok = false; }
}

// (c) emit-site label literals ⊆ taxonomy (F6 over emit sites). Scoped to the
// taxonomy-label emitters: the `*preflight*` functions emit Layer-D
// claim-classes (`codex-review-handoff`, `none`, …) via the SAME printf shape,
// so they are excluded by tracking the current column-0 function. The final
// reducer emit uses `"$final_label"` (a variable) which the literal-only regex
// skips by construction.
const FN_OPEN_RE = /^([A-Za-z_][A-Za-z0-9_]*)\s*\(\s*\)\s*\{/;
// Label-emit shape only: format begins with `%s\t` (label<TAB>…), matching the
// classifier's `'%s\t%s\t%s\n'` / `'%s\t\t%s\n'` emits. A bare `printf '%s'
// "word"` (no tab) is NOT a label emit and is excluded by construction.
const EMIT_RE = /\bprintf\s+'%s\\t[^']*'\s+"([a-z_][a-z_]*)"/g;
let curFn = "";
for (let i = 0; i < lines.length; i++) {
if (isComment(lines[i])) continue;
const fm = FN_OPEN_RE.exec(lines[i]);
if (fm) curFn = fm[1];
if (/preflight/.test(curFn)) continue; // claim-class vocabulary, not taxonomy labels
EMIT_RE.lastIndex = 0;
let m;
while ((m = EMIT_RE.exec(lines[i])) !== null) {
if (!labelIds.has(m[1])) {
violation("7", `${relPath}: emit-site label literal ${JSON.stringify(m[1])} (line ${i + 1}) is not a taxonomy label (F6 emit-site vocabulary closure)`);
ok = false;
}
}
}
return ok;
}

export function validateBpContract({ projectRoot, taxonomyPath = null, eventsPath = null, bpDirPath = null } = {}) {
const root = resolveProjectRoot(projectRoot, process.cwd());
const violations = [];
Expand Down Expand Up @@ -441,13 +547,16 @@ export function validateBpContract({ projectRoot, taxonomyPath = null, eventsPat
try { clsReal = fs.realpathSync(clsLex); }
catch { violation("7", `${rel}: default classifier script missing — R4 requires the default classifier; arm closure cannot be verified (fail-closed)`); continue; }
if (!contained(clsReal, root)) { violation("7", `${rel}: classifier resolves outside the project root — refusing to read`); continue; }
const arms = extractPriorityArms(fs.readFileSync(clsReal, "utf8"), rel, (d) => violation("7", d));
const clsSrc = fs.readFileSync(clsReal, "utf8");
const arms = extractPriorityArms(clsSrc, rel, (d) => violation("7", d));
if (arms === null) continue;
classifiersParsed++;
const armSet = new Set(arms);
if (new Set(arms).size !== arms.length) violation("7", `${rel}: duplicate _priority case arm(s)`);
for (const a of armSet) if (!labelIds.has(a)) violation("7", `${rel}: _priority arm ${JSON.stringify(a)} is not a taxonomy label (extra arm)`);
for (const id of labelIds) if (!armSet.has(id)) violation("7", `${rel}: taxonomy label ${JSON.stringify(id)} has NO _priority arm — it would rank priority 0, below read_only, a silent downgrade in most-restrictive-wins reduction (L473)`);
// Assertion 7c — default classifier runtime-sources taxonomy.json (P3c, F4/F6).
checkTaxonomySourcing(clsSrc, rel, labelIds, violation);
}
checks++;
if (manifests.some((m) => m.manifest.classifier && m.manifest.classifier.mode === "default") && classifiersParsed === 0) {
Expand Down
Loading
Loading