diff --git a/.github/workflows/harness.yml b/.github/workflows/harness.yml index 81cd9cc..f445f7d 100644 --- a/.github/workflows/harness.yml +++ b/.github/workflows/harness.yml @@ -39,8 +39,8 @@ jobs: - name: Run tests run: npm test - - name: Run chat-demo (smoke test) - run: npx tsx examples/chat-demo.ts "What is 2+2?" + - name: Run chat pipeline demo (smoke test) + run: npx tsx examples/chat-pipeline-demo.ts "What is 2+2?" - - name: Run coder-demo (smoke test) - run: npx tsx examples/coder-demo.ts "Add error handling to the code" + - name: Run coder pipeline demo (smoke test) + run: npx tsx examples/coder-pipeline-demo.ts "Add error handling to the code" diff --git a/CHANGELOG.md b/CHANGELOG.md index fef98e8..ca3787a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,6 @@ The project follows semantic versioning for schema and registry compatibility: ### Changed - Breaking temporal normalization across governance RFCs: canonical fields (`observed_at`, `decided_at`, `effective_at`, `expires_at`, `started_at`, `completed_at`, `superseded_at`) replace legacy aliases. -- Governance spine schemas updated (policy, permissions, delegation, audit, receipts, lifecycle, telemetry, memory, multi-agent protocol) and registry regenerated. +- Governance spine schemas updated (policy, permissions, delegation, audit, receipts, lifecycle, telemetry, memory, multi-party protocol) and registry regenerated. - Reference harness runtime/types aligned to canonical temporal fields, logical ordering metadata, and updated governance artifacts. - Example fixtures now use registry shortname folders for delegation, permissions, and execution/audit receipts. diff --git a/README.md b/README.md index 0883130..d4160b5 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ This makes Open CoT useful beyond any one framework. An implementation can use R |------|------| | [`rfcs/`](./rfcs/) | **53 RFCs** covering reasoning traces, tool invocation, governed execution, policy, delegation, receipts, capability manifests, cognitive artifacts, and reconciliation results | | [`schemas/`](./schemas/) | Versioned JSON Schemas per RFC, including `registry.json` | -| [`harness/`](./harness/) | Reference TypeScript harness that exercises earlier governed execution RFCs | +| [`harness/`](./harness/) | Reference TypeScript core package that exercises earlier governed execution RFCs | | [`examples/`](./examples/) | Validated instance fixtures keyed by registry shortname | | [`reference/python/`](./reference/python/) | Reference Python tooling | | [`tools/`](./tools/) | Schema and fixture validation, registry sync, and RFC helpers | @@ -89,7 +89,7 @@ pip install -r requirements-tools.txt python tools/validate.py ``` -Run the reference harness: +Run the reference package: ```bash cd harness && npm install && npm test @@ -105,7 +105,7 @@ That implementation pressure-tests Open CoT. If Open Lagrange needs a portable s - **53 RFCs** and a versioned JSON Schema registry. - New draft schemas for cognitive artifacts and reconciliation results. -- Reference harness coverage for governed execution, policy, delegation, receipts, budgets, and capability manifests. +- Reference package coverage for governed execution, policy, delegation, receipts, budgets, and capability manifests. - Cross-language validation tooling for schemas and examples. - Experiment cards and local runbooks under [`docs/experiments/`](./docs/experiments/). diff --git a/datasets/synthetic/generate_scaled.py b/datasets/synthetic/generate_scaled.py index bbd6af8..91c3ad2 100644 --- a/datasets/synthetic/generate_scaled.py +++ b/datasets/synthetic/generate_scaled.py @@ -35,7 +35,7 @@ def build_scaled_traces() -> list[dict[str, object]]: "benchmark", "validator", "trace", - "agent", + "pipeline", "memory", "policy", "budget", diff --git a/datasets/synthetic/task_bank_v1_large.jsonl b/datasets/synthetic/task_bank_v1_large.jsonl index 8302afc..38c6031 100644 --- a/datasets/synthetic/task_bank_v1_large.jsonl +++ b/datasets/synthetic/task_bank_v1_large.jsonl @@ -24,7 +24,7 @@ {"version": "0.1", "task": "Reverse the string 'benchmark'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'benchmark'[::-1] -> 'kramhcneb'", "parent": "s1"}], "final_answer": "kramhcneb"} {"version": "0.1", "task": "Reverse the string 'validator'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'validator'[::-1] -> 'rotadilav'", "parent": "s1"}], "final_answer": "rotadilav"} {"version": "0.1", "task": "Reverse the string 'trace'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'trace'[::-1] -> 'ecart'", "parent": "s1"}], "final_answer": "ecart"} -{"version": "0.1", "task": "Reverse the string 'agent'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'agent'[::-1] -> 'tnega'", "parent": "s1"}], "final_answer": "tnega"} +{"version": "0.1", "task": "Reverse the string 'cognitive pipeline'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'cognitive pipeline'[::-1] -> 'tnega'", "parent": "s1"}], "final_answer": "tnega"} {"version": "0.1", "task": "Reverse the string 'memory'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'memory'[::-1] -> 'yromem'", "parent": "s1"}], "final_answer": "yromem"} {"version": "0.1", "task": "Reverse the string 'policy'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'policy'[::-1] -> 'ycilop'", "parent": "s1"}], "final_answer": "ycilop"} {"version": "0.1", "task": "Reverse the string 'budget'.", "steps": [{"id": "s1", "type": "thought", "content": "Read characters from right to left."}, {"id": "s2", "type": "code", "content": "'budget'[::-1] -> 'tegdub'", "parent": "s1"}], "final_answer": "tegdub"} diff --git a/docs/bibliography.md b/docs/bibliography.md index ba97809..b077516 100644 --- a/docs/bibliography.md +++ b/docs/bibliography.md @@ -1,4 +1,4 @@ -# 📚 Annotated Bibliography: Chain‑of‑Thought & LLM Reasoning +# 📚 Annotated Bibliography: Chain‑of‑Thought & LLM Reasoning *With direct arXiv PDF links where available.* A curated bibliography covering foundational, structured, search‑based, RL‑based, and mechanistic reasoning research for LLMs. All arXiv‑hosted papers include stable PDF links. @@ -7,109 +7,109 @@ A curated bibliography covering foundational, structured, search‑based, RL‑b ## 1. Foundational Chain‑of‑Thought (CoT) -### Wei et al. (2022). *Chain‑of‑Thought Prompting Elicits Reasoning in Large Language Models.* -https://arxiv.org/pdf/2201.11903.pdf -Introduces CoT prompting and demonstrates large gains in arithmetic, symbolic, and commonsense reasoning. +### Wei et al. (2022). *Chain‑of‑Thought Prompting Elicits Reasoning in Large Language Models.* +https://arxiv.org/pdf/2201.11903.pdf +Introduces CoT prompting and demonstrates large gains in arithmetic, symbolic, and commonsense reasoning. **Relevance:** Defines the modern concept of “reasoning traces.” -### Wang et al. (2022). *Self‑Consistency Improves Chain‑of‑Thought Reasoning in LLMs.* -https://arxiv.org/pdf/2203.11171.pdf -Proposes sampling multiple CoTs and voting for the most consistent answer. +### Wang et al. (2022). *Self‑Consistency Improves Chain‑of‑Thought Reasoning in LLMs.* +https://arxiv.org/pdf/2203.11171.pdf +Proposes sampling multiple CoTs and voting for the most consistent answer. **Relevance:** Establishes statistical evaluation of reasoning. -### Zhou et al. (2022). *Least‑to‑Most Prompting.* -https://arxiv.org/pdf/2205.10625.pdf -Breaks complex tasks into simpler subproblems. +### Zhou et al. (2022). *Least‑to‑Most Prompting.* +https://arxiv.org/pdf/2205.10625.pdf +Breaks complex tasks into simpler subproblems. **Relevance:** Motivates structured decomposition fields in reasoning schemas. --- ## 2. Structured Reasoning & Agentic CoT -### Yao et al. (2022). *ReAct: Synergizing Reasoning and Acting in Language Models.* -https://arxiv.org/pdf/2210.03629.pdf -Combines reasoning (“Thought”) with tool actions (“Act”). -**Relevance:** Foundation of modern agent loops. +### Yao et al. (2022). *ReAct: Synergizing Reasoning and Acting in Language Models.* +https://arxiv.org/pdf/2210.03629.pdf +Combines reasoning (“Thought”) with tool actions (“Act”). +**Relevance:** Foundation of modern cognitive pipelines. -### Shinn et al. (2023). *Reflexion: Language Agents with Verbal Reinforcement Learning.* -https://arxiv.org/pdf/2303.11366.pdf -Introduces self‑critique and iterative refinement loops. +### Shinn et al. (2023). *Reflexion: Language Pipelines with Verbal Reinforcement Learning.* +https://arxiv.org/pdf/2303.11366.pdf +Introduces self‑critique and iterative refinement loops. **Relevance:** Motivates `critique` and `revision` fields in schemas. -### Chen et al. (2022). *Program‑of‑Thoughts (PoT).* -https://arxiv.org/pdf/2211.12588.pdf -Uses executable code as reasoning traces. +### Chen et al. (2022). *Program‑of‑Thoughts (PoT).* +https://arxiv.org/pdf/2211.12588.pdf +Uses executable code as reasoning traces. **Relevance:** Demonstrates typed, verifiable reasoning. --- ## 3. Search‑Based Reasoning (Beyond Linear CoT) -### Yao et al. (2023). *Tree‑of‑Thoughts: Deliberate Problem Solving with Large Language Models.* -https://arxiv.org/pdf/2305.10601.pdf -Generalizes CoT into a search tree with branching and pruning. +### Yao et al. (2023). *Tree‑of‑Thoughts: Deliberate Problem Solving with Large Language Models.* +https://arxiv.org/pdf/2305.10601.pdf +Generalizes CoT into a search tree with branching and pruning. **Relevance:** Motivates branching reasoning structures. -### Besta et al. (2023). *Graph‑of‑Thoughts: Solving Problems with Large Language Models and Search.* -https://arxiv.org/pdf/2308.09687.pdf -Extends ToT into graph‑structured reasoning. +### Besta et al. (2023). *Graph‑of‑Thoughts: Solving Problems with Large Language Models and Search.* +https://arxiv.org/pdf/2308.09687.pdf +Extends ToT into graph‑structured reasoning. **Relevance:** Encourages flexible graph‑based schemas. -### Long‑Horizon CoT Studies -(Various works; no single canonical arXiv source.) -Show that longer reasoning traces improve performance but increase instability. +### Long‑Horizon CoT Studies +(Various works; no single canonical arXiv source.) +Show that longer reasoning traces improve performance but increase instability. **Relevance:** Motivates metadata like `confidence`, `verification_status`, and `error_type`. --- ## 4. RL‑Based Reasoning (R1‑Style, DeepSeek‑Style, Qwen‑Style) -### DeepSeek‑R1 (2024). *DeepSeek‑R1: Incentivizing Reasoning in LLMs via Reinforcement Learning.* -https://arxiv.org/pdf/2501.12948.pdf -Uses RL with verifiable rewards to produce long, structured reasoning. +### DeepSeek‑R1 (2024). *DeepSeek‑R1: Incentivizing Reasoning in LLMs via Reinforcement Learning.* +https://arxiv.org/pdf/2501.12948.pdf +Uses RL with verifiable rewards to produce long, structured reasoning. **Relevance:** Aligns with structured scratchpad formats. -### Qwen2.5‑R1 (2024). *Reinforcement Learning for Reasoning.* -https://arxiv.org/pdf/2501.19393.pdf -Documents RL-centric post-training strategies that improve reasoning quality while preserving broad instruction utility. +### Qwen2.5‑R1 (2024). *Reinforcement Learning for Reasoning.* +https://arxiv.org/pdf/2501.19393.pdf +Documents RL-centric post-training strategies that improve reasoning quality while preserving broad instruction utility. **Relevance:** Supports reward-aware post-training pipelines and reproducibility-oriented run metadata. --- ## 5. Evaluation, Reliability, and Calibration -### Lin et al. (2021). *TruthfulQA: Measuring How Models Mimic Human Falsehoods.* -https://arxiv.org/pdf/2109.07958.pdf -Introduces reliability-oriented evaluation emphasizing truthful behavior under difficult prompts. +### Lin et al. (2021). *TruthfulQA: Measuring How Models Mimic Human Falsehoods.* +https://arxiv.org/pdf/2109.07958.pdf +Introduces reliability-oriented evaluation emphasizing truthful behavior under difficult prompts. **Relevance:** Motivates safety-aware benchmark slices and failure-mode tracking. -### Kadavath et al. (2022). *Language Models (Mostly) Know What They Know.* -https://arxiv.org/pdf/2207.05221.pdf -Studies calibration and confidence quality in language models. +### Kadavath et al. (2022). *Language Models (Mostly) Know What They Know.* +https://arxiv.org/pdf/2207.05221.pdf +Studies calibration and confidence quality in language models. **Relevance:** Motivates confidence and uncertainty metrics in verifier outputs. -### Gao et al. (2023). *Pal: Program-Aided Language Models.* -https://arxiv.org/pdf/2211.10435.pdf -Uses executable programs to verify intermediate reasoning steps. +### Gao et al. (2023). *Pal: Program-Aided Language Models.* +https://arxiv.org/pdf/2211.10435.pdf +Uses executable programs to verify intermediate reasoning steps. **Relevance:** Supports stronger step-level verification beyond format checks. --- ## 6. Open-Source Tooling and Reuse Guidance -### EleutherAI LM Evaluation Harness -https://github.com/EleutherAI/lm-evaluation-harness -De facto open benchmark runner for reproducible LLM evaluation. +### EleutherAI LM Evaluation Harness +https://github.com/EleutherAI/lm-evaluation-harness +De facto open benchmark runner for reproducible LLM evaluation. **Relevance:** Should be integrated through adapters rather than reimplemented. -### Hugging Face TRL -https://github.com/huggingface/trl -Open-source stack for SFT, DPO, PPO/GRPO-style fine-tuning workflows. +### Hugging Face TRL +https://github.com/huggingface/trl +Open-source stack for SFT, DPO, PPO/GRPO-style fine-tuning workflows. **Relevance:** Preferred training primitive for alignment and preference experiments. -### vLLM -https://github.com/vllm-project/vllm -High-throughput inference engine with consistent generation behavior for evaluation and serving. +### vLLM +https://github.com/vllm-project/vllm +High-throughput inference engine with consistent generation behavior for evaluation and serving. **Relevance:** Stabilizes benchmark throughput and reproducibility for large eval runs. --- @@ -142,9 +142,9 @@ Reports 56% token reduction vs JSON with native relationship support, type safet Use this checklist when building, fine-tuning, and validating models with Open CoT: -1. **Always emit structured traces** (`version`, `task`, `steps`, `final_answer`) and validate before scoring. -2. **Use multi-sample evaluation** with consistency metrics, not only single greedy outputs. -3. **Track lineage metadata** (dataset hash, model base, adapter hash, seed, decoding config) for every run. -4. **Enforce data governance gates** (license allowlist, dedup, contamination checks, provenance fields). -5. **Run policy and safety checks** (budget limits, tool restrictions, redaction/audit events) in runtime scripts. +1. **Always emit structured traces** (`version`, `task`, `steps`, `final_answer`) and validate before scoring. +2. **Use multi-sample evaluation** with consistency metrics, not only single greedy outputs. +3. **Track lineage metadata** (dataset hash, model base, adapter hash, seed, decoding config) for every run. +4. **Enforce data governance gates** (license allowlist, dedup, contamination checks, provenance fields). +5. **Run policy and safety checks** (budget limits, tool restrictions, redaction/audit events) in runtime scripts. 6. **Reuse mature OSS tooling** for training/evaluation kernels and keep Open CoT logic focused on schemas/adapters/conformance. \ No newline at end of file diff --git a/docs/cognitive-participation-pivot.md b/docs/cognitive-participation-pivot.md new file mode 100644 index 0000000..573f3c7 --- /dev/null +++ b/docs/cognitive-participation-pivot.md @@ -0,0 +1,54 @@ +# Cognitive Participation Pivot + +Open CoT defines a portable interface between cognition and execution. The +model contributes fuzzy text processing and structured cognitive artifacts; the +runtime boundary validates, authorizes, executes, records observations, and +reconciles final state. + +This distinction matters because natural-language reasoning is useful evidence, +but it is not authority. A reasoning trace can explain how a model reached a +proposal. It cannot grant permission, prove correctness, or bypass policy. + +| Common market framing | Open CoT framing | +| --- | --- | +| A model owns the loop | A runtime boundary owns reconciliation | +| Tool use is part of the model experience | Endpoint execution is a governed side effect | +| Prompts carry safety expectations | Capability snapshots and policy gates carry authority | +| Reasoning explains the whole run | Reasoning is cognitive evidence inside a larger audit record | +| A failed tool call is explained by natural language | A failed endpoint execution is recorded as a structured observation and error | +| Safety is mostly instruction-following | Safety is layered validation, permission, budget, and result reconciliation | +| Interfaces are private runtime details | Interfaces are portable schemas that independent runtimes can implement | + +## Reasoning Remains Central + +Open CoT keeps reasoning traces because they are evidence of cognitive +participation. They help answer: + +- What objective did the cognitive step believe it was handling? +- What constraints and assumptions shaped the proposal? +- What uncertainty was present before execution? +- What explanation can be shared safely with reviewers? +- What detailed evidence, if any, must remain restricted or redacted? + +The trace is intentionally separated from execution authority. A runtime may use +reasoning evidence during review, auditing, debugging, or evaluation, but it +must reconcile execution intents against capability snapshots, policy gates, +budgets, preconditions, and endpoint results. + +## Interface Boundary + +Open CoT should standardize portable artifacts: + +- Cognitive artifacts. +- Capability snapshots. +- Execution intents. +- Reasoning evidence. +- Observations. +- Policy evaluation records. +- Reconciliation results. +- Error taxonomy. +- Budget and cost boundaries. + +Open Lagrange and other implementations can then choose their own durable +runtime, transport, endpoint registry, policy engine, and storage model while +still sharing the same interface contract. diff --git a/docs/eli5_guide.md b/docs/eli5_guide.md index 5b89d18..18136cb 100644 --- a/docs/eli5_guide.md +++ b/docs/eli5_guide.md @@ -8,7 +8,7 @@ If you are not an LLM expert, this is for you. ## 1) What is this project? -Every AI agent framework today handles permissions, safety, and logging differently. If you switch models or tools, you start from scratch. There is no shared playbook. +Every AI cognitive pipeline framework today handles permissions, safety, and logging differently. If you switch models or tools, you start from scratch. There is no shared playbook. **Open CoT** is that shared playbook. It gives you: @@ -17,26 +17,26 @@ Every AI agent framework today handles permissions, safety, and logging differen - **typed receipts** that prove what actually happened - and it works the same whether you use GPT, Claude, Llama, Qwen, or anything else -Think of it like HTTP for agent behavior. HTTP does not care what web server you run -- it defines the contract. Open CoT does not care what model you run -- it defines the control contract. +Think of it like HTTP for cognitive pipeline behavior. HTTP does not care what web server you run -- it defines the contract. Open CoT does not care what model you run -- it defines the control contract. --- ## 2) The big idea in one sentence -> Models **propose** actions. The harness **decides** what is allowed. Receipts **prove** what happened. +> Models **propose** actions. The core package **evaluates** what is allowed. Receipts **prove** what happened. -That is the whole thing. The model never gets to authorize itself. It asks. The harness evaluates the ask against policies. If approved, the tool runs. Everything is logged. +That is the whole thing. The model never gets to authorize itself. It asks. The core package evaluates the ask against policies. If approved, the tool runs. Everything is logged. --- ## 3) Why should I care? -If you are building with AI agents, you have probably run into: +If you are building with AI pipelines, you have probably run into: -- **"How do I stop my agent from doing something dangerous?"** +- **"How do I stop my cognitive pipeline from doing something dangerous?"** Open CoT makes the model ask permission before every tool call. No permission, no execution. -- **"How do I know what my agent actually did?"** +- **"How do I know what my cognitive pipeline actually did?"** Every tool call produces a tamper-evident receipt. At the end, the whole run is sealed in an audit envelope with integrity hashes. - **"I switched from one model to another and everything broke."** @@ -47,14 +47,14 @@ If you are building with AI agents, you have probably run into: --- -## 4) The harness: what it is and how to think about it +## 4) The core package: what it is and how to think about it -The harness is the **runtime** that sits between the model and the real world. It is a TypeScript implementation of the governed execution model defined in the RFCs. +The core package is the **runtime** that sits between the model and the real world. It is a TypeScript implementation of the governed execution model defined in the RFCs. Here is the flow: ``` -You give the agent a task +You give the cognitive pipeline a task -> receive: harness logs the task -> frame: model interprets what needs doing -> plan: model proposes actions (maybe tool calls) @@ -70,11 +70,11 @@ You give the agent a task The model never skips the permission step. If a tool is not in the policy, it does not run. -Before the model even starts planning, the harness gives it a **capability manifest** -- a short briefing that says "here are the tools you can use, here are the ones you can't, and here is your remaining budget." This prevents the model from wasting time asking for things it will never get. +Before the model even starts planning, the core package gives it a **capability manifest** -- a short briefing that says "here are the tools you can use, here are the ones you can't, and here is your remaining budget." This prevents the model from wasting time asking for things it will never get. The manifest uses a **heartbeat** pattern: it re-compiles and re-injects before **every** call to the model, not just at the start. Why? Because models forget. As the conversation grows, information from earlier turns fades -- the model "loses sight" of what it was told 5 turns ago. The heartbeat keeps the truth fresh and prevents the model from hallucinating tool calls against stale or forgotten context. It costs under 200 tokens per injection, which is far less than the tokens wasted when a model guesses wrong and triggers a denial cycle. -For simple use cases (like a chatbot doing a pre-approved search), the harness has a shortcut: `plan -> execute_tool` skips the delegation ceremony for tools that are already on the allowlist. +For simple use cases (like a chatbot doing a pre-approved search), the core package has a shortcut: `plan -> execute_tool` skips the delegation ceremony for tools that are already on the allowlist. --- @@ -89,43 +89,43 @@ cd harness npm install ``` -### Run the chat agent +### Run the chat cognitive pipeline ```bash -npx tsx examples/chat-demo.ts +npx tsx examples/chat-pipeline-demo.ts ``` -This runs a simple agent loop: receive, frame, plan, execute a search tool, observe, finalize. Look at the output -- you will see the step-by-step trace and a validation check at the end. +This runs a simple cognitive-pipeline: receive, frame, plan, execute a search tool, observe, finalize. Look at the output -- you will see the step-by-step trace and a validation check at the end. -### Run the coder agent +### Run the coder cognitive pipeline ```bash -npx tsx examples/coder-demo.ts +npx tsx examples/coder-pipeline-demo.ts ``` This one is more complex. It reads a file, makes changes, writes the file, runs tests, and verifies. Watch the FSM transitions in the output -- you will see it loop through plan/execute/observe/critique. -### Run the governed agent (the new stuff) +### Run the governed cognitive pipeline (the new stuff) This is the flagship demo. It shows the full permission-aware flow. -**Allow mode** -- the agent asks to use the search tool, and the policy says yes: +**Allow mode** -- the cognitive pipeline asks to use the search tool, and the policy says yes: ```bash -npx tsx examples/governed-demo.ts +npx tsx examples/governed-pipeline-demo.ts ``` Look at the output. You will see: -- `request_authority` -- the agent formally requesting search access +- `request_authority` -- the cognitive pipeline formally requesting search access - `validate_authority` -- the policy evaluating the request - `delegate_narrow` -- authority granted - A governance summary showing 1 request, 1 approval - An audit envelope with integrity hashes -**Deny mode** -- same agent, but now the policy blocks search: +**Deny mode** -- same cognitive pipeline, but now the policy blocks search: ```bash -npx tsx examples/governed-demo.ts --deny "search for open source" +npx tsx examples/governed-pipeline-demo.ts --deny "search for open source" ``` Watch what happens: the flow goes `plan -> request_authority -> validate_authority -> deny -> audit_seal`. The tool never runs. The audit envelope records the denial with the reason. @@ -133,9 +133,9 @@ Watch what happens: the flow goes `plan -> request_authority -> validate_authori **Try your own questions:** ```bash -npx tsx examples/governed-demo.ts "calculate 2+2" -npx tsx examples/governed-demo.ts "search for the speed of light" -npx tsx examples/governed-demo.ts --deny "search for anything" +npx tsx examples/governed-pipeline-demo.ts "calculate 2+2" +npx tsx examples/governed-pipeline-demo.ts "search for the speed of light" +npx tsx examples/governed-pipeline-demo.ts --deny "search for anything" ``` ### See the full trace or envelope @@ -143,8 +143,8 @@ npx tsx examples/governed-demo.ts --deny "search for anything" Add `--trace` to dump the reasoning trace as JSON, or `--envelope` to see the complete audit envelope: ```bash -npx tsx examples/governed-demo.ts --envelope -npx tsx examples/governed-demo.ts --trace +npx tsx examples/governed-pipeline-demo.ts --envelope +npx tsx examples/governed-pipeline-demo.ts --trace ``` --- @@ -162,7 +162,7 @@ You should see 78 tests pass across 8 test files. The governance tests cover: - Policy evaluation: allow, deny, narrow, fail-closed defaults - Authority receipts with integrity hashing - Audit event chaining and envelope sealing -- End-to-end governed agent runs +- End-to-end governed cognitive pipeline runs --- @@ -186,7 +186,7 @@ Policies are just JSON objects. Here is the simplest possible policy that blocks } ``` -You can also **narrow** instead of deny. This policy lets the agent read email, but only headers: +You can also **narrow** instead of deny. This policy lets the cognitive pipeline read email, but only headers: ```json { @@ -254,13 +254,13 @@ Today, if you use Claude for coding and GPT for analysis and Llama for search, e With Open CoT, the contract is the same regardless of which model sits behind it: -1. **Any model** that can produce structured output can participate. The harness does not care if it came from Claude, GPT, Llama, Qwen, Mistral, or a local fine-tune. +1. **Any model** that can produce structured output can participate. The core package does not care if it came from Claude, GPT, Llama, Qwen, Mistral, or a local fine-tune. 2. **Same policies** apply to every model. You write your safety rules once. They apply whether the backend is a $200/month API or a 7B model running on your laptop. 3. **Same audit trail** for every run. Your compliance team does not need a different integration per vendor. -4. **Model adapters** are thin. The mock backend in this repo is 90 lines. A real OpenAI adapter is about the same. The adapter just translates model-specific output into the Open CoT schema -- the harness handles everything else. +4. **Model adapters** are thin. The mock backend in this repo is 90 lines. A real OpenAI adapter is about the same. The adapter just translates model-specific output into the Open CoT schema -- the core package handles everything else. This means you can swap models freely without rewriting your safety, permission, or audit logic. The control plane stays stable. @@ -271,10 +271,10 @@ This means you can swap models freely without rewriting your safety, permission, **"Read email headers but not bodies"** Policy narrows email access to subject/from/date only. The tool gets a permission with `allowed_fields: ["subject", "from", "date"]`. Even if the model asks for the body, it will not get it. -**"The agent tried to access a file it should not have"** +**"The cognitive pipeline tried to access a file it should not have"** The delegation request gets evaluated against policy. No matching allow rule = deny. The denial is recorded in the audit envelope. The file is never touched. -**"We need proof the agent only did what it was supposed to"** +**"We need proof the cognitive pipeline only did what it was supposed to"** The audit envelope contains: every delegation request, every decision, every tool execution receipt with input/output hashes, and an integrity hash over the whole thing. If any event was modified after the fact, the hash chain breaks. **"A tool call succeeded but returned suspicious output"** @@ -290,7 +290,7 @@ The postcondition check on the execution receipt catches it. Status is set to "q | `schemas/` | JSON Schemas generated from the RFCs. Machine-readable versions of the specs. | | `harness/` | TypeScript reference implementation. The actual running code. | | `harness/src/governance/` | Permission manager, policy evaluator, auth broker, audit engine. | -| `harness/src/agents/` | Three agent implementations: chat, coder, governed. | +| `harness/src/pipelines/` | Three cognitive pipeline implementations: chat, coder, governed. | | `harness/examples/` | Runnable demos you can try right now. | | `examples/` | JSON fixture files showing what the data looks like. | | `tools/` | Python scripts for validation and schema sync. | @@ -302,9 +302,9 @@ The postcondition check on the execution receipt catches it. Status is set to "q | Script | What it does | |--------|--------------| -| `npx tsx examples/chat-demo.ts` | Run the chat agent (simple mode) | -| `npx tsx examples/coder-demo.ts` | Run the coder agent (plan/execute/repair loop) | -| `npx tsx examples/governed-demo.ts` | Run the governed agent (full delegation flow) | +| `npx tsx examples/chat-pipeline-demo.ts` | Run the chat cognitive pipeline (simple mode) | +| `npx tsx examples/coder-pipeline-demo.ts` | Run the coder cognitive pipeline (plan/execute/repair loop) | +| `npx tsx examples/governed-pipeline-demo.ts` | Run the governed cognitive pipeline (full delegation flow) | | `npm test` (in harness/) | Run all 78 tests | | `python tools/validate.py` | Validate schemas and examples | | `python tools/sync_schemas_from_rfcs.py` | Regenerate JSON Schemas from RFC markdown | @@ -315,10 +315,10 @@ The postcondition check on the execution receipt catches it. Status is set to "q 1. Read this guide. 2. Run `cd harness && npm install && npm test` -- make sure everything passes. -3. Run `npx tsx examples/governed-demo.ts` -- watch the full flow. +3. Run `npx tsx examples/governed-pipeline-demo.ts` -- watch the full flow. 4. Run it again with `--deny` -- see the policy block the tool. 5. Run it with `--envelope` -- look at the audit envelope JSON. -6. Open `rfcs/0007-agent-loop-protocol.md` -- read about the FSM states. +6. Open `rfcs/0007-cognitive-pipeline-protocol.md` -- read about the FSM states. 7. Open `rfcs/0047-delegation-extension.md` -- see how delegation works. 8. Look at `examples/delegation/example1.json` -- see the data. 9. Look at `harness/src/governance/policy-evaluator.ts` -- see how policy rules are evaluated. @@ -337,27 +337,27 @@ No. Everything works with the mock backend out of the box. Yes. Set `OPENAI_BASE_URL=http://localhost:11434/v1` and the demos use your local model instead of the mock. ### "What if I break something?" -That is fine. The harness is designed to fail early with clear errors. Run `npm test` to check if things still work. +That is fine. The core package is designed to fail early with clear errors. Run `npm test` to check if things still work. ### "Is this only for TypeScript?" -The TypeScript harness is the reference implementation. The schemas are JSON Schema (language-agnostic). You could implement the same FSM and governance in Python, Go, Rust, or anything else. +The TypeScript core package is the reference implementation. The schemas are JSON Schema (language-agnostic). You could implement the same FSM and governance in Python, Go, Rust, or anything else. -### "What is the difference between the chat agent and the governed agent?" -The chat agent uses the pre-authorized shortcut -- it skips the delegation ceremony for tools on the allowlist. The governed agent goes through the full flow: request, evaluate, narrow, grant, execute, receipt. +### "What is the difference between the chat cognitive pipeline and the governed cognitive pipeline?" +The chat cognitive pipeline uses the pre-authorized shortcut -- it skips the delegation ceremony for tools on the allowlist. The governed cognitive pipeline goes through the full flow: request, evaluate, narrow, grant, execute, receipt. ### "Why does the default policy deny everything?" -Fail-closed. If you forget to write a policy rule, the agent cannot do anything. That is safer than the alternative. +Fail-closed. If you forget to write a policy rule, the cognitive pipeline cannot do anything. That is safer than the alternative. --- ## 15) Bottom line -Open CoT gives agents a standard way to ask for permission and gives you a standard way to say yes, no, or "yes but only this much." +Open CoT gives pipelines a standard way to ask for permission and gives you a standard way to say yes, no, or "yes but only this much." It works the same across models, produces tamper-evident audit trails, and fails safely when something is wrong. You do not need to be an expert to start. You need: - a terminal -- `npm install` in the harness folder +- `npm install` in the core package folder - and 5 minutes to run the demos diff --git a/docs/experiments/encrypted_reasoning_observability.md b/docs/experiments/encrypted_reasoning_observability.md index 2ebaf59..9e0332f 100644 --- a/docs/experiments/encrypted_reasoning_observability.md +++ b/docs/experiments/encrypted_reasoning_observability.md @@ -8,7 +8,7 @@ If models hide internal reasoning, enforcing RFC0001 structured traces with veri - `schemas/rfc-0001-reasoning.json` - `schemas/rfc-0002-verifier.json` -- `schemas/rfc-0007-agent-loop.json` +- `schemas/rfc-0007-cognitive-pipeline.json` ## Run command diff --git a/docs/experiments/runaway_loop_detection.md b/docs/experiments/runaway_loop_detection.md index da253e6..f79ee37 100644 --- a/docs/experiments/runaway_loop_detection.md +++ b/docs/experiments/runaway_loop_detection.md @@ -6,8 +6,8 @@ Loop-level guardrails can prevent infinite/redundant reasoning traces without co ## Required RFC/schema artifacts -- `schemas/rfc-0007-agent-loop.json` -- `schemas/rfc-0017-agent-safety-sandboxing.json` +- `schemas/rfc-0007-cognitive-pipeline.json` +- `schemas/rfc-0017-cognitive-pipeline-safety-sandboxing.json` - `schemas/rfc-0038-cost-aware-reasoning-budget.json` ## Run command diff --git a/docs/experiments/token_budget_enforcement.md b/docs/experiments/token_budget_enforcement.md index b2131d9..080fb94 100644 --- a/docs/experiments/token_budget_enforcement.md +++ b/docs/experiments/token_budget_enforcement.md @@ -8,7 +8,7 @@ Explicit token/step budgets improve predictability and cost control while mainta - `schemas/rfc-0038-cost-aware-reasoning-budget.json` - `schemas/rfc-0039-tool-cost-modeling.json` -- `schemas/rfc-0040-multi-agent-economic-incentives.json` +- `schemas/rfc-0040-multi-party-economic-incentives.json` ## Run command diff --git a/docs/experiments/toon_format_efficiency.md b/docs/experiments/toon_format_efficiency.md index 19bd6c5..dc426ae 100644 --- a/docs/experiments/toon_format_efficiency.md +++ b/docs/experiments/toon_format_efficiency.md @@ -50,9 +50,9 @@ Measure: - **Repair loops:** How many re-prompts needed for a valid parse? - **Token consumption:** prompt + completion tokens per successful generation. -### 4. End-to-end agent run +### 4. End-to-end cognitive pipeline run -Run the governed agent demo with `wireFormat: "toon"` vs `wireFormat: "compact-text"` vs `wireFormat: "json"` on the same objective. Compare: +Run the governed cognitive pipeline demo with `wireFormat: "toon"` vs `wireFormat: "compact-text"` vs `wireFormat: "json"` on the same objective. Compare: - Total prompt tokens across all LLM calls - Total completion tokens @@ -80,7 +80,7 @@ Fixture files: [`examples/toon/`](../../examples/toon/) | Reduction vs JSON (minified) | percentage | same formula | | Parse success rate | percentage | `fromToon` success / total attempts | | Repair loop count | integer | re-prompts until valid parse | -| Task completion rate | percentage | agent runs with correct final answer | +| Task completion rate | percentage | cognitive pipeline runs with correct final answer | | Total tokens per successful run | integer | sum of all LLM calls | ## Expected failure modes @@ -95,11 +95,11 @@ Fixture files: [`examples/toon/`](../../examples/toon/) # Static comparison (once fixture scripts are ready) npx tsx harness/examples/toon-benchmark.ts -# Governed agent with TOON -WIRE_FORMAT=toon npx tsx harness/examples/governed-demo.ts +# Governed cognitive pipeline with TOON +WIRE_FORMAT=toon npx tsx harness/examples/governed-pipeline-demo.ts -# Governed agent with compact-text (baseline) -WIRE_FORMAT=compact-text npx tsx harness/examples/governed-demo.ts +# Governed cognitive pipeline with compact-text (baseline) +WIRE_FORMAT=compact-text npx tsx harness/examples/governed-pipeline-demo.ts ``` ## Success criteria @@ -108,4 +108,4 @@ WIRE_FORMAT=compact-text npx tsx harness/examples/governed-demo.ts - TOON achieves at least 30% token reduction vs minified JSON for multi-step reasoning traces. - Round-trip validation passes for 100% of fixtures. - Parse success rate on model-generated TOON is at least 90% for GPT-4 class models without repair loops. -- No regression in task completion quality when governed agent uses `wireFormat: "toon"`. +- No regression in task completion quality when governed cognitive pipeline uses `wireFormat: "toon"`. diff --git a/docs/related-work.md b/docs/related-work.md index 48c1324..e54dde6 100644 --- a/docs/related-work.md +++ b/docs/related-work.md @@ -1,7 +1,7 @@ # 📚 Related Work: Structured Reasoning, Chain‑of‑Thought, and Agentic LLMs This document summarizes the major research threads that inform the **Open CoT Standard**. -It complements the annotated bibliography by explaining *how* each line of work influences the design of structured reasoning schemas, evaluation metrics, and agent loop protocols. +It complements the annotated bibliography by explaining *how* each line of work influences the design of structured reasoning schemas, evaluation metrics, and cognitive pipeline protocols. --- @@ -26,7 +26,7 @@ Interleaves *Thought* and *Action* steps, enabling tool use and environment inte **Impact:** - Schema includes `action`, `observation`, and `tool` fields. -- Supports agent loops that alternate between reasoning and acting. +- Supports cognitive pipelines that alternate between reasoning and acting. ### Reflexion Adds self‑critique and iterative refinement. diff --git a/docs/rfc-discussion-index.json b/docs/rfc-discussion-index.json index 7100fe2..73236a5 100644 --- a/docs/rfc-discussion-index.json +++ b/docs/rfc-discussion-index.json @@ -1,7 +1,7 @@ { "repository": "supernovae/open-cot", "category_slug": "ideas", - "count": 51, + "count": 53, "rfcs": { "0001": { "rfc_title": "Initial Structured Reasoning Schema", @@ -40,9 +40,9 @@ "discussion_url": "https://github.com/supernovae/open-cot/discussions/6" }, "0007": { - "rfc_title": "Agent Loop Protocol", - "rfc_path": "rfcs/0007-agent-loop-protocol.md", - "discussion_title": "RFC 0007 \u2014 Agent Loop Protocol", + "rfc_title": "Cognitive Pipeline Protocol", + "rfc_path": "rfcs/0007-cognitive-pipeline-protocol.md", + "discussion_title": "RFC 0007 \u2014 Cognitive Pipeline Protocol", "discussion_url": "https://github.com/supernovae/open-cot/discussions/7" }, "0008": { @@ -58,14 +58,14 @@ "discussion_url": "https://github.com/supernovae/open-cot/discussions/9" }, "0010": { - "rfc_title": "Agent Memory Schema", - "rfc_path": "rfcs/0010-agent-memory-schema.md", - "discussion_title": "RFC 0010 \u2014 Agent Memory Schema", + "rfc_title": "Cognitive Context Schema", + "rfc_path": "rfcs/0010-cognitive-context-schema.md", + "discussion_title": "RFC 0010 \u2014 Cognitive Context Schema", "discussion_url": "https://github.com/supernovae/open-cot/discussions/10" }, "0011": { "rfc_title": "Multi\u2011Agent Protocol", - "rfc_path": "rfcs/0011-multi-agent-protocol.md", + "rfc_path": "rfcs/0011-multi-party-cognition-protocol.md", "discussion_title": "RFC 0011 \u2014 Multi\u2011Agent Protocol", "discussion_url": "https://github.com/supernovae/open-cot/discussions/11" }, @@ -89,7 +89,7 @@ }, "0015": { "rfc_title": "Multi\u2011Agent Reward Sharing", - "rfc_path": "rfcs/0015-multi-agent-reward-sharing.md", + "rfc_path": "rfcs/0015-multi-party-reward-sharing.md", "discussion_title": "RFC 0015 \u2014 Multi\u2011Agent Reward Sharing", "discussion_url": "https://github.com/supernovae/open-cot/discussions/15" }, @@ -100,9 +100,9 @@ "discussion_url": "https://github.com/supernovae/open-cot/discussions/16" }, "0017": { - "rfc_title": "Agent Safety & Sandboxing", - "rfc_path": "rfcs/0017-agent-safety-sandboxing.md", - "discussion_title": "RFC 0017 \u2014 Agent Safety & Sandboxing", + "rfc_title": "Runtime Safety & Sandboxing", + "rfc_path": "rfcs/0017-runtime-safety-sandboxing.md", + "discussion_title": "RFC 0017 \u2014 Runtime Safety & Sandboxing", "discussion_url": "https://github.com/supernovae/open-cot/discussions/17" }, "0018": { @@ -113,7 +113,7 @@ }, "0019": { "rfc_title": "Multi\u2011Agent Planning Graphs", - "rfc_path": "rfcs/0019-multi-agent-planning-graphs.md", + "rfc_path": "rfcs/0019-collaborative-planning-graphs.md", "discussion_title": "RFC 0019 \u2014 Multi\u2011Agent Planning Graphs", "discussion_url": "https://github.com/supernovae/open-cot/discussions/19" }, @@ -124,15 +124,15 @@ "discussion_url": "https://github.com/supernovae/open-cot/discussions/20" }, "0021": { - "rfc_title": "Agent Capability Declaration", - "rfc_path": "rfcs/0021-agent-capability-declaration.md", - "discussion_title": "RFC 0021 \u2014 Agent Capability Declaration", + "rfc_title": "Capability Declaration", + "rfc_path": "rfcs/0021-capability-declaration.md", + "discussion_title": "RFC 0021 \u2014 Capability Declaration", "discussion_url": "https://github.com/supernovae/open-cot/discussions/21" }, "0022": { - "rfc_title": "Agent Evaluation Protocol", - "rfc_path": "rfcs/0022-agent-evaluation-protocol.md", - "discussion_title": "RFC 0022 \u2014 Agent Evaluation Protocol", + "rfc_title": "Cognitive Evaluation Protocol", + "rfc_path": "rfcs/0022-cognitive-evaluation-protocol.md", + "discussion_title": "RFC 0022 \u2014 Cognitive Evaluation Protocol", "discussion_url": "https://github.com/supernovae/open-cot/discussions/22" }, "0023": { @@ -154,57 +154,57 @@ "discussion_url": "https://github.com/supernovae/open-cot/discussions/25" }, "0026": { - "rfc_title": "Agent Identity & Authentication", - "rfc_path": "rfcs/0026-agent-identity-auth.md", - "discussion_title": "RFC 0026 \u2014 Agent Identity & Authentication", + "rfc_title": "Requester Identity & Authentication", + "rfc_path": "rfcs/0026-requester-identity-auth.md", + "discussion_title": "RFC 0026 \u2014 Requester Identity & Authentication", "discussion_url": "https://github.com/supernovae/open-cot/discussions/26" }, "0027": { - "rfc_title": "Distributed Agent Execution Protocol", - "rfc_path": "rfcs/0027-distributed-agent-execution-protocol.md", - "discussion_title": "RFC 0027 \u2014 Distributed Agent Execution Protocol", + "rfc_title": "Distributed Execution Protocol", + "rfc_path": "rfcs/0027-distributed-execution-protocol.md", + "discussion_title": "RFC 0027 \u2014 Distributed Execution Protocol", "discussion_url": "https://github.com/supernovae/open-cot/discussions/27" }, "0028": { - "rfc_title": "Agent\u2011to\u2011Environment Interaction Schema", - "rfc_path": "rfcs/0028-agent-to-environment-schema.md", - "discussion_title": "RFC 0028 \u2014 Agent\u2011to\u2011Environment Interaction Schema", + "rfc_title": "Cognitive pipeline\u2011to\u2011Environment Interaction Schema", + "rfc_path": "rfcs/0028-capability-environment-schema.md", + "discussion_title": "RFC 0028 \u2014 Cognitive pipeline\u2011to\u2011Environment Interaction Schema", "discussion_url": "https://github.com/supernovae/open-cot/discussions/28" }, "0029": { - "rfc_title": "Agent Benchmark Dataset Format", - "rfc_path": "rfcs/0029-agent-benchmark-dataset.md", - "discussion_title": "RFC 0029 \u2014 Agent Benchmark Dataset Format", + "rfc_title": "Cognitive Benchmark Dataset Format", + "rfc_path": "rfcs/0029-cognitive-benchmark-dataset.md", + "discussion_title": "RFC 0029 \u2014 Cognitive Benchmark Dataset Format", "discussion_url": "https://github.com/supernovae/open-cot/discussions/29" }, "0030": { - "rfc_title": "Agent Lifecycle & Versioning", - "rfc_path": "rfcs/0030-agent-lifecycle-versioning.md", - "discussion_title": "RFC 0030 \u2014 Agent Lifecycle & Versioning", + "rfc_title": "Runtime Lifecycle & Versioning", + "rfc_path": "rfcs/0030-runtime-lifecycle-versioning.md", + "discussion_title": "RFC 0030 \u2014 Runtime Lifecycle & Versioning", "discussion_url": "https://github.com/supernovae/open-cot/discussions/30" }, "0031": { - "rfc_title": "Agent Observability & Telemetry", - "rfc_path": "rfcs/0031-agent-observability-telemtry.md", - "discussion_title": "RFC 0031 \u2014 Agent Observability & Telemetry", + "rfc_title": "Cognitive Observability & Telemetry", + "rfc_path": "rfcs/0031-cognitive-observability-telemetry.md", + "discussion_title": "RFC 0031 \u2014 Cognitive Observability & Telemetry", "discussion_url": "https://github.com/supernovae/open-cot/discussions/31" }, "0032": { - "rfc_title": "Agent Deployment Manifest", - "rfc_path": "rfcs/0032-agent-deployment-manifest.md", - "discussion_title": "RFC 0032 \u2014 Agent Deployment Manifest", + "rfc_title": "Runtime Deployment Manifest", + "rfc_path": "rfcs/0032-runtime-deployment-manifest.md", + "discussion_title": "RFC 0032 \u2014 Runtime Deployment Manifest", "discussion_url": "https://github.com/supernovae/open-cot/discussions/32" }, "0033": { - "rfc_title": "Agent Security Posture & Threat Model", - "rfc_path": "rfcs/0033-agent-security-threat-model.md", - "discussion_title": "RFC 0033 \u2014 Agent Security Posture & Threat Model", + "rfc_title": "Runtime Security Posture & Threat Model", + "rfc_path": "rfcs/0033-runtime-security-threat-model.md", + "discussion_title": "RFC 0033 \u2014 Runtime Security Posture & Threat Model", "discussion_url": "https://github.com/supernovae/open-cot/discussions/33" }, "0034": { - "rfc_title": "Agent Federation Protocol", - "rfc_path": "rfcs/0034-agent-federation-protocol.md", - "discussion_title": "RFC 0034 \u2014 Agent Federation Protocol", + "rfc_title": "Cognitive Federation Protocol", + "rfc_path": "rfcs/0034-cognitive-federation-protocol.md", + "discussion_title": "RFC 0034 \u2014 Cognitive Federation Protocol", "discussion_url": "https://github.com/supernovae/open-cot/discussions/34" }, "0035": { @@ -214,9 +214,9 @@ "discussion_url": "https://github.com/supernovae/open-cot/discussions/35" }, "0036": { - "rfc_title": "Agent\u2011Native Compression & Delta Sync", - "rfc_path": "rfcs/0036-agent-native-compression-delta.md", - "discussion_title": "RFC 0036 \u2014 Agent\u2011Native Compression & Delta Sync", + "rfc_title": "Cognitive pipeline\u2011Native Compression & Delta Sync", + "rfc_path": "rfcs/0036-cognitive-native-compression-delta.md", + "discussion_title": "RFC 0036 \u2014 Cognitive pipeline\u2011Native Compression & Delta Sync", "discussion_url": "https://github.com/supernovae/open-cot/discussions/36" }, "0037": { @@ -239,7 +239,7 @@ }, "0040": { "rfc_title": "Multi\u2011Agent Economic Incentives", - "rfc_path": "rfcs/0040-multi-agent-economic-incentives.md", + "rfc_path": "rfcs/0040-multi-party-economic-incentives.md", "discussion_title": "RFC 0040 \u2014 Multi\u2011Agent Economic Incentives", "discussion_url": "https://github.com/supernovae/open-cot/discussions/40" }, @@ -268,9 +268,9 @@ "discussion_url": "https://github.com/supernovae/open-cot/discussions/44" }, "0045": { - "rfc_title": "Ethical & Risk Constraints for Reasoning Agents", - "rfc_path": "rfcs/0045-ethical-risk-contraints-reasoning-agents.md", - "discussion_title": "RFC 0045 \u2014 Ethical & Risk Constraints for Reasoning Agents", + "rfc_title": "Ethics & Risk Constraints for Cognitive Runtimes", + "rfc_path": "rfcs/0045-ethics-risk-constraints-cognitive-runtimes.md", + "discussion_title": "RFC 0045 \u2014 Ethics & Risk Constraints for Cognitive Runtimes", "discussion_url": "https://github.com/supernovae/open-cot/discussions/45" }, "0046": { diff --git a/docs/rfc-discussions.md b/docs/rfc-discussions.md index c1d5bff..05de98f 100644 --- a/docs/rfc-discussions.md +++ b/docs/rfc-discussions.md @@ -3,7 +3,7 @@ Canonical discussion threads for all Open CoT RFCs. Use these threads for normative feedback and design discussion. - Discussion policy: RFC feedback uses GitHub Discussions first; issues are for actionable tasks/bugs. -- Source mapping: `docs/rfc-discussion-index.json` (51 RFCs) +- Source mapping: `docs/rfc-discussion-index.json` (53 RFCs) | RFC | Topic | Discussion | |-----|-------|------------| @@ -13,45 +13,45 @@ Canonical discussion threads for all Open CoT RFCs. Use these threads for normat | [`RFC 0004`](../rfcs/0004-branching-reasoning-schema.md) | Branching Reasoning Extensions | [Open thread](https://github.com/supernovae/open-cot/discussions/4) | | [`RFC 0005`](../rfcs/0005-rl-reward-trace-schema.md) | RL Reward Trace Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/5) | | [`RFC 0006`](../rfcs/0006-multi-verifier-ensemble-schema.md) | Multi‑Verifier Ensemble Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/6) | -| [`RFC 0007`](../rfcs/0007-agent-loop-protocol.md) | Agent Loop Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/7) | +| [`RFC 0007`](../rfcs/0007-cognitive-pipeline-protocol.md) | Cognitive Pipeline Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/7) | | [`RFC 0008`](../rfcs/0008-dataset-packaging-standard.md) | Dataset Packaging Standard | [Open thread](https://github.com/supernovae/open-cot/discussions/8) | | [`RFC 0009`](../rfcs/0009-reward-fusion-specification.md) | Reward Fusion Specification | [Open thread](https://github.com/supernovae/open-cot/discussions/9) | -| [`RFC 0010`](../rfcs/0010-agent-memory-schema.md) | Agent Memory Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/10) | -| [`RFC 0011`](../rfcs/0011-multi-agent-protocol.md) | Multi‑Agent Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/11) | +| [`RFC 0010`](../rfcs/0010-cognitive-context-schema.md) | Cognitive Context Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/10) | +| [`RFC 0011`](../rfcs/0011-multi-party-cognition-protocol.md) | Multi-Party Cognition Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/11) | | [`RFC 0012`](../rfcs/0012-dataset-streaming-format.md) | Dataset Streaming Format | [Open thread](https://github.com/supernovae/open-cot/discussions/12) | | [`RFC 0013`](../rfcs/0013-memory-compression-embedding.md) | Memory Compression & Embedding | [Open thread](https://github.com/supernovae/open-cot/discussions/13) | | [`RFC 0014`](../rfcs/0014-memory-conflict-resolution.md) | Memory Conflict Resolution | [Open thread](https://github.com/supernovae/open-cot/discussions/14) | -| [`RFC 0015`](../rfcs/0015-multi-agent-reward-sharing.md) | Multi‑Agent Reward Sharing | [Open thread](https://github.com/supernovae/open-cot/discussions/15) | +| [`RFC 0015`](../rfcs/0015-multi-party-reward-sharing.md) | Multi-Party Reward Sharing | [Open thread](https://github.com/supernovae/open-cot/discussions/15) | | [`RFC 0016`](../rfcs/0016-tool-capability-negotiation.md) | Tool Capability Negotiation | [Open thread](https://github.com/supernovae/open-cot/discussions/16) | -| [`RFC 0017`](../rfcs/0017-agent-safety-sandboxing.md) | Agent Safety & Sandboxing | [Open thread](https://github.com/supernovae/open-cot/discussions/17) | +| [`RFC 0017`](../rfcs/0017-runtime-safety-sandboxing.md) | Runtime Safety & Sandboxing | [Open thread](https://github.com/supernovae/open-cot/discussions/17) | | [`RFC 0018`](../rfcs/0018-tool-error-taxonomy.md) | Tool Error Taxonomy | [Open thread](https://github.com/supernovae/open-cot/discussions/18) | -| [`RFC 0019`](../rfcs/0019-multi-agent-planning-graphs.md) | Multi‑Agent Planning Graphs | [Open thread](https://github.com/supernovae/open-cot/discussions/19) | +| [`RFC 0019`](../rfcs/0019-collaborative-planning-graphs.md) | Collaborative Planning Graphs | [Open thread](https://github.com/supernovae/open-cot/discussions/19) | | [`RFC 0020`](../rfcs/0020-verifiable-scratchpad-compression.md) | Verifiable Scratchpad Compression | [Open thread](https://github.com/supernovae/open-cot/discussions/20) | -| [`RFC 0021`](../rfcs/0021-agent-capability-declaration.md) | Agent Capability Declaration | [Open thread](https://github.com/supernovae/open-cot/discussions/21) | -| [`RFC 0022`](../rfcs/0022-agent-evaluation-protocol.md) | Agent Evaluation Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/22) | +| [`RFC 0021`](../rfcs/0021-capability-declaration.md) | Capability Declaration | [Open thread](https://github.com/supernovae/open-cot/discussions/21) | +| [`RFC 0022`](../rfcs/0022-cognitive-evaluation-protocol.md) | Cognitive Evaluation Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/22) | | [`RFC 0023`](../rfcs/0023-humain-in-the-loop-schema.md) | Human‑in‑the‑Loop Feedback Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/23) | | [`RFC 0024`](../rfcs/0024-multi-modal-reasoning-schema.md) | Multi‑Modal Reasoning Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/24) | | [`RFC 0025`](../rfcs/0025-tool-marketplace-registry-format.md) | Tool Marketplace Registry Format | [Open thread](https://github.com/supernovae/open-cot/discussions/25) | -| [`RFC 0026`](../rfcs/0026-agent-identity-auth.md) | Agent Identity & Authentication | [Open thread](https://github.com/supernovae/open-cot/discussions/26) | -| [`RFC 0027`](../rfcs/0027-distributed-agent-execution-protocol.md) | Distributed Agent Execution Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/27) | -| [`RFC 0028`](../rfcs/0028-agent-to-environment-schema.md) | Agent‑to‑Environment Interaction Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/28) | -| [`RFC 0029`](../rfcs/0029-agent-benchmark-dataset.md) | Agent Benchmark Dataset Format | [Open thread](https://github.com/supernovae/open-cot/discussions/29) | -| [`RFC 0030`](../rfcs/0030-agent-lifecycle-versioning.md) | Agent Lifecycle & Versioning | [Open thread](https://github.com/supernovae/open-cot/discussions/30) | -| [`RFC 0031`](../rfcs/0031-agent-observability-telemtry.md) | Agent Observability & Telemetry | [Open thread](https://github.com/supernovae/open-cot/discussions/31) | -| [`RFC 0032`](../rfcs/0032-agent-deployment-manifest.md) | Agent Deployment Manifest | [Open thread](https://github.com/supernovae/open-cot/discussions/32) | -| [`RFC 0033`](../rfcs/0033-agent-security-threat-model.md) | Agent Security Posture & Threat Model | [Open thread](https://github.com/supernovae/open-cot/discussions/33) | -| [`RFC 0034`](../rfcs/0034-agent-federation-protocol.md) | Agent Federation Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/34) | +| [`RFC 0026`](../rfcs/0026-requester-identity-auth.md) | Requester Identity & Authentication | [Open thread](https://github.com/supernovae/open-cot/discussions/26) | +| [`RFC 0027`](../rfcs/0027-distributed-execution-protocol.md) | Distributed Execution Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/27) | +| [`RFC 0028`](../rfcs/0028-capability-environment-schema.md) | Capability Environment Interaction Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/28) | +| [`RFC 0029`](../rfcs/0029-cognitive-benchmark-dataset.md) | Cognitive Benchmark Dataset Format | [Open thread](https://github.com/supernovae/open-cot/discussions/29) | +| [`RFC 0030`](../rfcs/0030-runtime-lifecycle-versioning.md) | Runtime Lifecycle & Versioning | [Open thread](https://github.com/supernovae/open-cot/discussions/30) | +| [`RFC 0031`](../rfcs/0031-cognitive-observability-telemetry.md) | Cognitive Observability & Telemetry | [Open thread](https://github.com/supernovae/open-cot/discussions/31) | +| [`RFC 0032`](../rfcs/0032-runtime-deployment-manifest.md) | Runtime Deployment Manifest | [Open thread](https://github.com/supernovae/open-cot/discussions/32) | +| [`RFC 0033`](../rfcs/0033-runtime-security-threat-model.md) | Runtime Security Posture & Threat Model | [Open thread](https://github.com/supernovae/open-cot/discussions/33) | +| [`RFC 0034`](../rfcs/0034-cognitive-federation-protocol.md) | Cognitive Federation Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/34) | | [`RFC 0035`](../rfcs/0035-data-provenance-tracking.md) | Data Provenance Tracking | [Open thread](https://github.com/supernovae/open-cot/discussions/35) | -| [`RFC 0036`](../rfcs/0036-agent-native-compression-delta.md) | Agent‑Native Compression & Delta Sync | [Open thread](https://github.com/supernovae/open-cot/discussions/36) | +| [`RFC 0036`](../rfcs/0036-cognitive-native-compression-delta.md) | Cognitive-Native Compression & Delta Sync | [Open thread](https://github.com/supernovae/open-cot/discussions/36) | | [`RFC 0037`](../rfcs/0037-token-economy-cost-modeling.md) | Token Economy & Cost Modeling | [Open thread](https://github.com/supernovae/open-cot/discussions/37) | | [`RFC 0038`](../rfcs/0038-cost-aware-reasoning-budget.md) | Cost‑Aware Reasoning & Budget Enforcement | [Open thread](https://github.com/supernovae/open-cot/discussions/38) | | [`RFC 0039`](../rfcs/0039-tool-cost-modeling-biling.md) | Tool Cost Modeling & Billing Semantics | [Open thread](https://github.com/supernovae/open-cot/discussions/39) | -| [`RFC 0040`](../rfcs/0040-multi-agent-economic-incentives.md) | Multi‑Agent Economic Incentives | [Open thread](https://github.com/supernovae/open-cot/discussions/40) | +| [`RFC 0040`](../rfcs/0040-multi-party-economic-incentives.md) | Multi-Party Economic Incentives | [Open thread](https://github.com/supernovae/open-cot/discussions/40) | | [`RFC 0041`](../rfcs/0041-policy-enforcement-schema.md) | Policy Enforcement Schema | [Open thread](https://github.com/supernovae/open-cot/discussions/41) | | [`RFC 0042`](../rfcs/0042-permission-acl.md) | Permissions & Access Control | [Open thread](https://github.com/supernovae/open-cot/discussions/42) | | [`RFC 0043`](../rfcs/0043-auditing-compliance-logs.md) | Auditing & Compliance Logs | [Open thread](https://github.com/supernovae/open-cot/discussions/43) | | [`RFC 0044`](../rfcs/0044-governance-organizational-controls.md) | Governance & Organizational Controls | [Open thread](https://github.com/supernovae/open-cot/discussions/44) | -| [`RFC 0045`](../rfcs/0045-ethical-risk-contraints-reasoning-agents.md) | Ethical & Risk Constraints for Reasoning Agents | [Open thread](https://github.com/supernovae/open-cot/discussions/45) | +| [`RFC 0045`](../rfcs/0045-ethics-risk-constraints-cognitive-runtimes.md) | Ethics & Risk Constraints for Cognitive Runtimes | [Open thread](https://github.com/supernovae/open-cot/discussions/45) | | [`RFC 0046`](../rfcs/0046-conformance-interoperability-protocol.md) | Conformance & Interoperability Protocol | [Open thread](https://github.com/supernovae/open-cot/discussions/46) | | [`RFC 0047`](../rfcs/0047-delegation-extension.md) | Delegation Extension | [Open thread](https://github.com/supernovae/open-cot/discussions/47) | | [`RFC 0048`](../rfcs/0048-execution-receipts-audit-envelopes.md) | Execution Receipts & Audit Envelopes | [Open thread](https://github.com/supernovae/open-cot/discussions/48) | diff --git a/docs/rfc-e2e-gap-matrix.md b/docs/rfc-e2e-gap-matrix.md index a406dd7..dfc9f7e 100644 --- a/docs/rfc-e2e-gap-matrix.md +++ b/docs/rfc-e2e-gap-matrix.md @@ -22,9 +22,9 @@ This matrix maps current RFC coverage to the concrete functions needed for an en | Tool/verifier sidecars | RFC `0002`, `0003`, `0006` | Partial | Schema coverage + examples | Integrate into default evaluation pipeline | | Branching/search trace structures | RFC `0004` | Partial | Schema/examples only | Add scoring and harness paths for branch metrics | | Reward and reward fusion | RFC `0005`, `0009` | Partial | Schema/examples only | Add reward-aware evaluation and preference/RL training glue | -| Agent loop protocol | RFC `0007` | Partial | `reference/python/agent_loop_runner.py` mock loop | Wire into real eval/training traces and audit outputs | +| Cognitive pipeline loop protocol | RFC `0007` | Partial | `reference/python/cognitive_pipeline_runner.py` mock loop | Wire into real eval/training traces and audit outputs | | Dataset packaging/streaming | RFC `0008`, `0012` | Implemented | Packaging layouts, manifests, synthetic datasets | Add strict governance gates (dedup/contamination/provenance assertions) | -| Agent memory/safety/policy | RFC `0010`, `0017`, `0041`, `0045` | Partial | Schema-level docs, limited runtime checks | Enforce policy budgets/redaction/tool denial in eval scripts | +| Cognitive pipeline memory/safety/policy | RFC `0010`, `0017`, `0041`, `0045` | Partial | Schema-level docs, limited runtime checks | Enforce policy budgets/redaction/tool denial in eval scripts | | Evaluation reporting | RFC `0022`, `0029` | Partial | `benchmarks/scoring/scorer.py`, task specs | Add self-consistency, semantic step checks, harness adapter | | Identity/compliance/audit | RFC `0026`, `0043`, `0044` | Partial | Policies specified | Emit machine-readable audit events per run | | Observability + budgets + cost | RFC `0031`, `0037`, `0038`, `0039` | Gap | Mostly RFC text and examples | Add runtime counters and budget guardrails in execution | diff --git a/docs/token-efficiency.md b/docs/token-efficiency.md index d08c60b..6f677c3 100644 --- a/docs/token-efficiency.md +++ b/docs/token-efficiency.md @@ -20,9 +20,9 @@ That split—verbose canonical JSON off the hot path, terse briefing on the hot **Where this meets code:** the governed execution loop, transitions, and trace/state plumbing live under [`harness/`](../harness/README.md). Useful entry points: -- [`harness/src/agents/governed-agent.ts`](../harness/src/agents/governed-agent.ts) — end-to-end governed FSM with delegation and receipts +- [`harness/src/pipelines/governed-pipeline.ts`](../harness/src/pipelines/governed-pipeline.ts) — end-to-end governed FSM with delegation and receipts - [`harness/src/core/transitions.ts`](../harness/src/core/transitions.ts) — phase changes and harness-driven state -- [`harness/src/core/state.ts`](../harness/src/core/state.ts) — `AgentState` and trace accumulation +- [`harness/src/core/state.ts`](../harness/src/core/state.ts) — `PipelineState` and trace accumulation - [`harness/src/core/tool-registry.ts`](../harness/src/core/tool-registry.ts) — registered tools and contracts feeding compilation Manifest compilation should draw from the same sources the enforcement path trusts (registry, sandbox config, active policies, budget tracker), not from prose the model invented. That keeps the briefing aligned with what will actually happen when a tool call is attempted. @@ -56,7 +56,7 @@ TOON sits between Tier 2 (ad-hoc markers) and Tier 3 (new serialization language - **JSON Schema stays normative.** TOON is a serialization adapter, not a schema language. All validation, audit, and interchange remain JSON. - **Round-trip fidelity.** `fromToon(toToon(obj, schema), schema)` must produce the same validated object. The adapter is not a trust boundary. - **Inline guardrails.** The `[N]` length marker and `{fields}` header tell the model exactly how many items to generate and which keys to use, reducing hallucinated structure. -- **Opt-in via `wire_format`.** Set `wire_format: "toon"` on agent config; default remains `"compact-text"` for backward compatibility. +- **Opt-in via `wire_format`.** Set `wire_format: "toon"` on cognitive pipeline config; default remains `"compact-text"` for backward compatibility. Example — the capability manifest in TOON vs compact text: @@ -75,7 +75,7 @@ constraints: max 5 results per search; no raw HTML The TOON form for this manifest uses roughly 30–40% fewer tokens than the equivalent JSON, and is comparable or slightly more compact than the hand-coded compact text — with the advantage that the adapter is reusable across any schema, not just manifests. -**Implementation:** [`harness/src/adapters/toon-adapter.ts`](../harness/src/adapters/toon-adapter.ts) provides `toToon`, `fromToon`, and `schemaToToonHeader`. The manifest builder ([`harness/src/governance/manifest-builder.ts`](../harness/src/governance/manifest-builder.ts)) adds `manifestToToon` and a `serializeManifest` dispatcher. Both the governed agent and chat agent accept a `wireFormat` config option. +**Implementation:** [`harness/src/adapters/toon-adapter.ts`](../harness/src/adapters/toon-adapter.ts) provides `toToon`, `fromToon`, and `schemaToToonHeader`. The manifest builder ([`harness/src/governance/manifest-builder.ts`](../harness/src/governance/manifest-builder.ts)) adds `manifestToToon` and a `serializeManifest` dispatcher. Both the governed cognitive pipeline and chat cognitive pipeline accept a `wireFormat` config option. **Research backing:** @@ -105,7 +105,7 @@ Models often reason better with room to be verbose. Control-plane metadata does Concrete directions: - **Observation summarization.** A search tool might return hundreds of tokens of noisy HTML. The harness could extract entities, numbers, and citations into a short fact list before `observe_result` reaches the model, while the full payload remains attached to the trace step or external object storage for auditors. Summarization could be heuristic (strip tags), model-based (a cheap summarizer), or policy-driven (only whitelisted fields). -- **Trace windowing.** `AgentState` can retain the full run; the model-facing context might show only the last *N* tool cycles or the last *M* tokens of narration. Long multi-tool runs otherwise drown in their own history; windowing is likely mandatory for hour-scale tasks even when total context fits in theory. +- **Trace windowing.** `PipelineState` can retain the full run; the model-facing context might show only the last *N* tool cycles or the last *M* tokens of narration. Long multi-tool runs otherwise drown in their own history; windowing is likely mandatory for hour-scale tasks even when total context fits in theory. - **Metadata stripping.** Delegation request ids, integrity hashes, receipt fields, and other audit-only columns need not be echoed back to the model. Keep them in structured state and in sealed envelopes, not in the prompt loop. The model needs *enough* correlation to refer to “the last search,” not the full cryptographic tail. - **Phase-aware injection.** At `plan`, surface the objective plus a fresh capability manifest so proposals stay feasible. At `critique_verify`, bias toward the latest observation, the manifest refresh (budget and revocation changes), and a compact summary of prior conclusions—rather than replaying the entire thread from `receive`. - **Deterministic replay vs model context.** Anything you strip from the prompt must still be reconstructable for debugging. Document whether summarization is **reversible** (lossless compression) or **interpretive** (lossy), because critique quality depends on which you chose. @@ -162,7 +162,7 @@ These are not homework problems with known answers; they are gaps we expect to c - Can observation summarization preserve enough detail for accurate `critique_verify`, or does summarization systematically hide the faults reviewers need to catch? - What is the **minimum** viable context for a `critique_verify` step on your tasks—objective only, last observation only, manifest refresh only? - When native `tool_calls` are available, does switching away from JSON-in-prose materially change **total** tokens once system prompts and tool schemas are included? -- For multi-agent or delegated subgraphs, which metadata is safe to strip without breaking the child’s correlation to parent receipts? +- For multi-party or delegated subgraphs, which metadata is safe to strip without breaking the child’s correlation to parent receipts? --- @@ -170,7 +170,7 @@ These are not homework problems with known answers; they are gaps we expect to c If you run small models locally, your measurements matter more than our guesses. -- Run the governed agent demo paths in [`harness/examples/`](../harness/examples/) and capture token usage (`prompt_tokens`, `completion_tokens`, and per-phase estimates if your wrapper exposes them). +- Run the governed cognitive pipeline demo paths in [`harness/examples/`](../harness/examples/) and capture token usage (`prompt_tokens`, `completion_tokens`, and per-phase estimates if your wrapper exposes them). - Try context compilation ideas—summaries, windowing, stripped metadata—and report what broke and what held. Attach redacted prompts if you can. - Benchmark structured text markers vs JSON for **your** model and tool set; share model id, quantization, temperature, and rough numbers—not just one cherry-picked success. - Propose compact formats with a clear mapping to existing schemas and an estimate of token savings **including** failure cases. diff --git a/examples/agent_loop/example1.json b/examples/cognitive_pipeline/example1.json similarity index 100% rename from examples/agent_loop/example1.json rename to examples/cognitive_pipeline/example1.json diff --git a/examples/delegation_extension/example1.json b/examples/delegation_extension/example1.json index 58c61f3..a2a80a0 100644 --- a/examples/delegation_extension/example1.json +++ b/examples/delegation_extension/example1.json @@ -2,7 +2,7 @@ "delegation_request": { "schema_version": "0.2", "request_id": "req-a1b2c3d4", - "requester": "agent-planner-01", + "requester": "cognitive-pipeline-planner-01", "run_id": "run-e5f6g7h8", "intent": "Read email headers to find meeting invitations", "justification": "The user asked me to check for upcoming meetings.", diff --git a/examples/execution_receipts_audit_envelopes/example-envelope.json b/examples/execution_receipts_audit_envelopes/example-envelope.json index 6ec257c..51d8a64 100644 --- a/examples/execution_receipts_audit_envelopes/example-envelope.json +++ b/examples/execution_receipts_audit_envelopes/example-envelope.json @@ -2,7 +2,7 @@ "schema_version": "0.2", "envelope_id": "11111111-2222-4333-8444-555555555555", "run_id": "run-e5f6g7h8", - "agent_id": "agent-planner-01", + "requester_id": "cognitive-pipeline-planner-01", "task_hash": "3c9e2a1f7b0d6e5c8a4f2b1d9e8c7a6f5b4d3e2c1a0f9e8d7c6b5a4f3e2d1c0b", "started_at": "2026-04-18T14:30:00Z", "completed_at": "2026-04-18T14:30:05Z", diff --git a/examples/permission_acl/example1.json b/examples/permission_acl/example1.json index 95a265d..4a8d5a8 100644 --- a/examples/permission_acl/example1.json +++ b/examples/permission_acl/example1.json @@ -1,6 +1,6 @@ { "permission_id": "perm-q7r8s9t0", - "granted_to": "agent-planner-01", + "granted_to": "cognitive-pipeline-planner-01", "scope": { "resource": "tool:email", "action": "read", diff --git a/examples/toon/capability-manifest.json b/examples/toon/capability-manifest.json index a7846c5..e83cf07 100644 --- a/examples/toon/capability-manifest.json +++ b/examples/toon/capability-manifest.json @@ -1,7 +1,7 @@ { "manifest_id": "cm_01jqzexample0001", "run_id": "run_8f3c2a", - "agent_id": "agent_researcher_eu", + "requester_id": "agent_researcher_eu", "timestamp": "2026-04-18T14:22:05Z", "phase": "frame", "tools": { diff --git a/harness/README.md b/harness/README.md index f674c63..c92a846 100644 --- a/harness/README.md +++ b/harness/README.md @@ -1,17 +1,17 @@ -# Open CoT Reference Harness +# Open CoT Core Reference Package -A TypeScript reference implementation that **proves the Open CoT standard is executable, testable, and operational**. The harness emits, consumes, and validates RFC-compliant reasoning traces — making the schema feel like a contract, not just documentation. +A TypeScript reference implementation that **proves the Open CoT standard is executable, testable, and operational**. The core package emits, consumes, and validates RFC-compliant reasoning traces — making the schema feel like a contract, not just documentation. ## Bidirectional verification -The harness and the schema verify each other: +The core package and the schema verify each other: | Direction | What it proves | |-----------|----------------| -| **Schema verifies harness** | Forces valid event structure, consistent state transitions, budget accounting, tool result shape, completion criteria, and replayability | -| **Harness verifies schema** | Proves the schema is sufficient, ergonomic, debuggable, and works under real agent loops | +| **Schema verifies package output** | Forces valid event structure, consistent state transitions, budget accounting, tool result shape, completion criteria, and replayability | +| **Package verifies schema** | Proves the schema is sufficient, ergonomic, debuggable, and works under real cognitive pipelines | -This feedback loop catches schema gaps early — if the harness can't express a real-world pattern, the schema needs updating. +This feedback loop catches schema gaps early — if the core package can't express a real-world pattern, the schema needs updating. ## Architecture @@ -19,7 +19,7 @@ This feedback loop catches schema gaps early — if the harness can't express a src/ schemas/ TypeScript types mirroring the JSON Schemas (RFC 0001, 0003, 0007, 0017, 0031, 0038) core/ - state.ts Agent state: objective, phase, budgets, evidence, trace + state.ts Cognitive pipeline state: objective, phase, budgets, evidence, trace transitions.ts FSM engine: plan -> inspect -> act -> verify -> repair -> summarize -> stop budget-tracker Token, cost, step, tool-call, retry budgets with exhaustion handling trace-emitter Structured step emission for every transition and action @@ -31,10 +31,10 @@ src/ openai-compat Any OpenAI-compatible API: OpenAI, Ollama, vLLM, LiteLLM tools/ mock-tools.ts search, calculator, readFile, writeFile, runTests - agents/ - chat-agent.ts Conversational loop with policy-mediated authority checks - coder-agent.ts Coder loop with policy-mediated authority + repair - governed-agent.ts Full RFC 0007 governed flow with receipts + audit sealing + pipelines/ + chat-pipeline.ts Conversational loop with policy-mediated authority checks + coder-pipeline.ts Coder loop with policy-mediated authority + repair + governed-pipeline.ts Full RFC 0007 governed flow with receipts + audit sealing ``` ## Quick start @@ -47,29 +47,29 @@ npm install npm test ``` -### Run the chat agent demo +### Run the chat cognitive pipeline demo ```bash -npx tsx examples/chat-demo.ts +npx tsx examples/chat-pipeline-demo.ts ``` -### Run the coder agent demo +### Run the coder cognitive pipeline demo ```bash -npx tsx examples/coder-demo.ts +npx tsx examples/coder-pipeline-demo.ts ``` -### Run the governed agent demo +### Run the governed cognitive pipeline demo ```bash -npx tsx examples/governed-demo.ts +npx tsx examples/governed-pipeline-demo.ts ``` Policy modes: ```bash -npx tsx examples/governed-demo.ts --deny "search for info" -npx tsx examples/governed-demo.ts --narrow "search for info" +npx tsx examples/governed-pipeline-demo.ts --deny "search for info" +npx tsx examples/governed-pipeline-demo.ts --narrow "search for info" ``` ### Choose a policy engine for governed demo @@ -80,14 +80,14 @@ Use `POLICY_ENGINE`: - `opa`: sends delegation requests to OPA and maps decisions into Open CoT objects ```bash -POLICY_ENGINE=inprocess npx tsx examples/governed-demo.ts +POLICY_ENGINE=inprocess npx tsx examples/governed-pipeline-demo.ts ``` ```bash POLICY_ENGINE=opa \ OPA_BASE_URL=http://127.0.0.1:8181 \ OPA_POLICY_PATH=open_cot/delegation \ -npx tsx examples/governed-demo.ts +npx tsx examples/governed-pipeline-demo.ts ``` Optional OPA env vars: @@ -117,14 +117,14 @@ npm run test:opa-live ## Runtime governance guarantees -Current harness behavior (runtime, not just schema/docs): +Current core package behavior (runtime, not just schema/docs): -- **Policy mediation for all shipped agents**: `chat-agent`, `coder-agent`, and `governed-agent` route tool execution through a `DelegationPolicyEngine` before dispatch. +- **Policy mediation for all shipped pipelines**: `chat-pipeline`, `coder-pipeline`, and `governed-pipeline` route tool execution through a `DelegationPolicyEngine` before dispatch. - **Dispatch-time least privilege enforcement**: tool arguments are schema-validated and checked against delegated scope constraints (`allowed_fields`, `excluded_fields`, `max_results`) in `ToolRegistry`. - **Phase consultation checks**: policy consultation hooks are enforced at `frame`, `plan`, `observe_result`, `critique_verify`, and `finalize`. - **Manifest/policy reconciliation**: capability manifests can be compiled from policy-engine tool previews (including OPA-backed decisions), so model-visible tool posture reflects live policy outcomes. -`chat-agent` and `coder-agent` default to an in-process policy derived from sandbox allow/block lists. You can override this by passing explicit `policies` and/or a custom `policyEngine`. +`chat-pipeline` and `coder-pipeline` default to an in-process policy derived from sandbox allow/block lists. You can override this by passing explicit `policies` and/or a custom `policyEngine`. ### Use a real LLM (Ollama example) @@ -133,19 +133,19 @@ Current harness behavior (runtime, not just schema/docs): ollama serve & ollama pull qwen2.5:1.5b -# Point the harness at Ollama -OPENAI_BASE_URL=http://localhost:11434/v1 npx tsx examples/chat-demo.ts "What is the capital of France?" +# Point the core package at Ollama +OPENAI_BASE_URL=http://localhost:11434/v1 npx tsx examples/chat-pipeline-demo.ts "What is the capital of France?" ``` ### Use OpenAI ```bash -OPENAI_API_KEY=sk-... OPENAI_MODEL=gpt-4o-mini npx tsx examples/chat-demo.ts "Explain recursion" +OPENAI_API_KEY=sk-... OPENAI_MODEL=gpt-4o-mini npx tsx examples/chat-pipeline-demo.ts "Explain recursion" ``` ## FSM transition map -Every agent follows this finite state machine. Every transition emits a trace event. +Every cognitive pipeline follows this finite state machine. Every transition emits a trace event. ``` plan ──┬──> inspect ──┬──> act ──> verify ──┬──> summarize ──> plan (loop) @@ -177,18 +177,18 @@ The budget tracker (RFC 0038) enforces: - **Tool-call budget** — maximum tool invocations - **Retry budget** — maximum repair attempts -When any hard-enforced budget hits zero, the agent is force-stopped with `budget_exhausted` status and the trace records why. +When any hard-enforced budget hits zero, the cognitive pipeline is force-stopped with `budget_exhausted` status and the trace records why. ### Streaming decode circuit breaker -The harness now enforces token/safety limits during streamed decoding (not only after full responses): +The core package now enforces token/safety limits during streamed decoding (not only after full responses): - **Preflight budget gate**: estimate prompt token cost before each model call; if insufficient remaining budget, stop before decode starts. - **Mid-stream token breaker**: stream callbacks track emitted completion tokens and abort decode once the remaining completion allowance is exhausted. - **Mid-stream safety breaker**: stream callbacks can stop runaway or unsafe output patterns and route to `fail_safe`. - **FSM-first shutdown**: on breaker trip, the run is forced into terminal state (`budget_exhausted`, `fail_safe`, or `external_stop`) before any subsequent tool side effects. -This keeps authority in the harness FSM even when a model ignores budget instructions. +This keeps authority in the core package FSM even when a model ignores budget instructions. ## Tool contracts @@ -212,7 +212,7 @@ Sandbox policy (RFC 0017) controls which tools are allowed/blocked at runtime. ## Adding new tools ```typescript -import { ToolRegistry, defineToolContract } from "@open-cot/harness"; +import { ToolRegistry, defineToolContract } from "@open-cot/core"; const registry = new ToolRegistry(); registry.register( @@ -227,11 +227,11 @@ registry.register( ); ``` -## Adding new agents +## Adding new pipelines -Create a new file in `src/agents/` that: +Create a new file in `src/pipelines/` that: -1. Creates state with `createAgentState()` +1. Creates state with `createPipelineState()` 2. Uses `transition()` to move through FSM phases 3. Uses `emitPlan/emitAction/emitObservation/emitVerify/emitSummary` to build the trace 4. Uses `createBudgetTracker()` to track resource usage @@ -241,11 +241,11 @@ The FSM engine prevents invalid transitions and the validator confirms the outpu ## Cross-language validation -Traces emitted by the TypeScript harness can be validated by the Python tooling: +Traces emitted by the TypeScript core package can be validated by the Python tooling: ```bash -# Save a trace from the harness -npx tsx examples/chat-demo.ts > trace.json +# Save a trace from the core package +npx tsx examples/chat-pipeline-demo.ts > trace.json # Validate with the Python validator python tools/validate.py --trace trace.json @@ -253,7 +253,7 @@ python tools/validate.py --trace trace.json This proves the schema contract works across implementations. -## What the harness demonstrates +## What the core package demonstrates - Standard-compliant trace emission (RFC 0001 + RFC 0007) - Tool call / observation pairing (RFC 0003) diff --git a/harness/examples/chat-demo.ts b/harness/examples/chat-pipeline-demo.ts similarity index 78% rename from harness/examples/chat-demo.ts rename to harness/examples/chat-pipeline-demo.ts index bce7aad..fbbf945 100644 --- a/harness/examples/chat-demo.ts +++ b/harness/examples/chat-pipeline-demo.ts @@ -1,17 +1,17 @@ #!/usr/bin/env npx tsx /** - * Chat agent demo — run with `npx tsx examples/chat-demo.ts` + * Chat cognitive pipeline demo — run with `npx tsx examples/chat-pipeline-demo.ts` * * By default uses the mock backend (zero external deps). * Set OPENAI_BASE_URL and OPENAI_API_KEY to use a real LLM. * * Examples: - * npx tsx examples/chat-demo.ts # mock backend + * npx tsx examples/chat-pipeline-demo.ts # mock backend * OPENAI_BASE_URL=http://localhost:11434/v1 \ - * npx tsx examples/chat-demo.ts # Ollama + * npx tsx examples/chat-pipeline-demo.ts # Ollama */ -import { runChatAgent } from "../src/agents/chat-agent.js"; +import { runChatPipeline } from "../src/pipelines/chat-pipeline.js"; import { MockLLMBackend } from "../src/backends/mock.js"; import { OpenAICompatBackend } from "../src/backends/openai-compat.js"; import { createMockToolRegistry } from "../src/tools/mock-tools.js"; @@ -31,11 +31,11 @@ async function main() { const question = process.argv[2] ?? "Search for the population of Tokyo and calculate its square root."; - console.log(`\n--- Chat Agent Demo ---`); + console.log(`\n--- Chat Cognitive pipeline Demo ---`); console.log(`Question: ${question}\n`); const backend = pickBackend(); - const trace = await runChatAgent(backend, question, createMockToolRegistry()); + const trace = await runChatPipeline(backend, question, createMockToolRegistry()); console.log(`Answer: ${trace.final_answer}`); console.log(`\nCompletion: ${trace.termination ?? "unknown"}`); diff --git a/harness/examples/coder-demo.ts b/harness/examples/coder-pipeline-demo.ts similarity index 81% rename from harness/examples/coder-demo.ts rename to harness/examples/coder-pipeline-demo.ts index 32118e1..633d61f 100644 --- a/harness/examples/coder-demo.ts +++ b/harness/examples/coder-pipeline-demo.ts @@ -1,6 +1,6 @@ #!/usr/bin/env npx tsx /** - * Coder agent demo — run with `npx tsx examples/coder-demo.ts` + * Coder cognitive pipeline demo — run with `npx tsx examples/coder-pipeline-demo.ts` * * Demonstrates the full plan-do-act FSM: plan -> inspect -> act -> verify -> * (repair loop) -> summarize -> stop. @@ -9,12 +9,12 @@ * Set OPENAI_BASE_URL and OPENAI_API_KEY to use a real LLM. * * Examples: - * npx tsx examples/coder-demo.ts # mock backend + * npx tsx examples/coder-pipeline-demo.ts # mock backend * OPENAI_BASE_URL=http://localhost:11434/v1 \ - * npx tsx examples/coder-demo.ts # Ollama + * npx tsx examples/coder-pipeline-demo.ts # Ollama */ -import { runCoderAgent } from "../src/agents/coder-agent.js"; +import { runCoderPipeline } from "../src/pipelines/coder-pipeline.js"; import { MockLLMBackend } from "../src/backends/mock.js"; import { OpenAICompatBackend } from "../src/backends/openai-compat.js"; import { createMockToolRegistry } from "../src/tools/mock-tools.js"; @@ -35,11 +35,11 @@ async function main() { process.argv[2] ?? "Read the file src/main.ts, add error handling, write the changes, and verify with tests."; - console.log(`\n--- Coder Agent Demo ---`); + console.log(`\n--- Coder Cognitive pipeline Demo ---`); console.log(`Task: ${task}\n`); const backend = pickBackend(); - const trace = await runCoderAgent(backend, task, createMockToolRegistry()); + const trace = await runCoderPipeline(backend, task, createMockToolRegistry()); console.log(`Answer: ${trace.final_answer}`); console.log(`\nCompletion: ${trace.termination ?? "unknown"}`); diff --git a/harness/examples/governed-demo.ts b/harness/examples/governed-pipeline-demo.ts similarity index 90% rename from harness/examples/governed-demo.ts rename to harness/examples/governed-pipeline-demo.ts index 92eb629..fa2686f 100644 --- a/harness/examples/governed-demo.ts +++ b/harness/examples/governed-pipeline-demo.ts @@ -1,6 +1,6 @@ #!/usr/bin/env npx tsx /** - * Governed agent demo — run with `npx tsx examples/governed-demo.ts` + * Governed cognitive pipeline demo — run with `npx tsx examples/governed-pipeline-demo.ts` * * Shows the full governed execution flow: the model requests permission, * the policy engine evaluates, the auth broker narrows scope, tools @@ -8,10 +8,10 @@ * envelope. * * Try different scenarios: - * npx tsx examples/governed-demo.ts # search (allowed) - * npx tsx examples/governed-demo.ts "calculate 2+2" # calculator (allowed) - * npx tsx examples/governed-demo.ts "search for open source" # search (allowed) - * npx tsx examples/governed-demo.ts --deny "search for info" # search (denied by policy) + * npx tsx examples/governed-pipeline-demo.ts # search (allowed) + * npx tsx examples/governed-pipeline-demo.ts "calculate 2+2" # calculator (allowed) + * npx tsx examples/governed-pipeline-demo.ts "search for open source" # search (allowed) + * npx tsx examples/governed-pipeline-demo.ts --deny "search for info" # search (denied by policy) * * Policy engine selection via env: * POLICY_ENGINE=inprocess|opa @@ -24,8 +24,8 @@ * OPA_FALLBACK_INPROCESS=true|false */ -import { runGovernedAgent } from "../src/agents/governed-agent.js"; -import type { GovernedAgentConfig } from "../src/agents/governed-agent.js"; +import { runGovernedPipeline } from "../src/pipelines/governed-pipeline.js"; +import type { GovernedPipelineConfig } from "../src/pipelines/governed-pipeline.js"; import { MockLLMBackend } from "../src/backends/mock.js"; import { OpenAICompatBackend } from "../src/backends/openai-compat.js"; import { createMockToolRegistry } from "../src/tools/mock-tools.js"; @@ -163,21 +163,21 @@ async function main() { } const policyEngineSelection = pickPolicyEngine(policies, policyMode); - console.log(`\n--- Governed Agent Demo ---`); + console.log(`\n--- Governed Cognitive pipeline Demo ---`); console.log(`Policy mode: ${mode}`); console.log(`Policy engine: ${policyEngineSelection.engineLabel}`); console.log(`Question: ${question}\n`); - const config: GovernedAgentConfig = { + const config: GovernedPipelineConfig = { objective: question, backend: pickBackend(), toolRegistry: createMockToolRegistry(), policies: policyEngineSelection.manifestPolicies, policyEngine: policyEngineSelection.engine, - agentId: "demo-agent-01", + requesterId: "demo-cognitive-pipeline-01", }; - const { trace, envelope, state } = await runGovernedAgent(config); + const { trace, envelope, state } = await runGovernedPipeline(config); console.log(`\n--- Result ---`); console.log(`Answer: ${trace.final_answer}`); diff --git a/harness/examples/opa/README.md b/harness/examples/opa/README.md index d5cea86..c83691f 100644 --- a/harness/examples/opa/README.md +++ b/harness/examples/opa/README.md @@ -18,7 +18,7 @@ cd harness POLICY_ENGINE=opa \ OPA_BASE_URL=http://127.0.0.1:8181 \ OPA_POLICY_PATH=open_cot/delegation \ -npx tsx examples/governed-demo.ts +npx tsx examples/governed-pipeline-demo.ts ``` ## Try policy modes @@ -34,11 +34,11 @@ Examples: ```bash # deny search requests POLICY_ENGINE=opa OPA_BASE_URL=http://127.0.0.1:8181 \ - npx tsx examples/governed-demo.ts --deny "search for open source" + npx tsx examples/governed-pipeline-demo.ts --deny "search for open source" # narrow search requests POLICY_ENGINE=opa OPA_BASE_URL=http://127.0.0.1:8181 \ - npx tsx examples/governed-demo.ts --narrow "search for open source" + npx tsx examples/governed-pipeline-demo.ts --narrow "search for open source" ``` ## Response contract expected by harness diff --git a/harness/package-lock.json b/harness/package-lock.json index 3d61579..b7982fa 100644 --- a/harness/package-lock.json +++ b/harness/package-lock.json @@ -1,11 +1,11 @@ { - "name": "@open-cot/harness", + "name": "@open-cot/core", "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "@open-cot/harness", + "name": "@open-cot/core", "version": "0.1.0", "license": "MIT", "dependencies": { diff --git a/harness/package.json b/harness/package.json index fb6c5a7..fdfa1be 100644 --- a/harness/package.json +++ b/harness/package.json @@ -1,7 +1,7 @@ { - "name": "@open-cot/harness", + "name": "@open-cot/core", "version": "0.1.0", - "description": "Reference harness implementation for the Open CoT standard — emits, consumes, and validates RFC-compliant reasoning traces", + "description": "Reference core package for Open CoT cognitive artifacts, interfaces, policy boundaries, and reconciliation traces", "type": "module", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -11,15 +11,17 @@ "test:opa-live": "OPA_BASE_URL=${OPA_BASE_URL:-http://127.0.0.1:8181} OPA_POLICY_PATH=${OPA_POLICY_PATH:-open_cot/delegation} OPA_LIVE_POLICY_MODE=${OPA_LIVE_POLICY_MODE:-allow} vitest run tests/policy-engine-live.test.ts", "test:watch": "vitest", "typecheck": "tsc --noEmit", - "chat-demo": "tsx examples/chat-demo.ts", - "coder-demo": "tsx examples/coder-demo.ts" + "chat-pipeline-demo": "tsx examples/chat-pipeline-demo.ts", + "coder-pipeline-demo": "tsx examples/coder-pipeline-demo.ts", + "governed-pipeline-demo": "tsx examples/governed-pipeline-demo.ts" }, "keywords": [ "open-cot", "chain-of-thought", "reasoning", - "agent", - "harness" + "cognitive-runtime", + "policy", + "reconciliation" ], "license": "MIT", "devDependencies": { diff --git a/harness/src/agents/index.ts b/harness/src/agents/index.ts deleted file mode 100644 index 7515344..0000000 --- a/harness/src/agents/index.ts +++ /dev/null @@ -1,7 +0,0 @@ -export { runChatAgent } from "./chat-agent.js"; -export { runCoderAgent } from "./coder-agent.js"; -export { runGovernedAgent } from "./governed-agent.js"; -export type { ChatGovernanceOptions } from "./chat-agent.js"; -export type { CoderGovernanceOptions } from "./coder-agent.js"; -export type { GovernedAgentConfig, GovernedAgentResult } from "./governed-agent.js"; -export type { WireFormat } from "../governance/manifest-builder.js"; diff --git a/harness/src/backends/types.ts b/harness/src/backends/types.ts index ad9ba23..5ff83da 100644 --- a/harness/src/backends/types.ts +++ b/harness/src/backends/types.ts @@ -1,7 +1,7 @@ /** * LLM backend interface — abstraction over model providers. * - * Both mock and real backends implement this interface so agents can switch + * Both mock and real backends implement this interface so pipelines can switch * between deterministic testing and real inference without code changes. */ diff --git a/harness/src/core/budget-tracker.ts b/harness/src/core/budget-tracker.ts index 77ba957..eff07be 100644 --- a/harness/src/core/budget-tracker.ts +++ b/harness/src/core/budget-tracker.ts @@ -2,7 +2,7 @@ * Budget tracker — RFC 0038 (Cost-Aware Reasoning Budget). * * Maintains running totals for token, cost, step, tool-call, and retry budgets. - * When any hard-enforced budget is exhausted the tracker signals that the agent + * When any hard-enforced budget is exhausted the tracker signals that the cognitive pipeline * must stop. */ @@ -11,15 +11,15 @@ import type { BudgetSnapshot, BudgetChangeEvent, } from "../schemas/budget.js"; -import type { AgentState } from "./state.js"; +import type { PipelineState } from "./state.js"; import { forceStop } from "./transitions.js"; export interface BudgetTracker { - recordTokens(state: AgentState, count: number, reason: string): void; - recordCost(state: AgentState, amount: number, reason: string): void; - recordStep(state: AgentState, reason: string): void; - recordToolCall(state: AgentState, reason: string): void; - recordRetry(state: AgentState, reason: string): void; + recordTokens(state: PipelineState, count: number, reason: string): void; + recordCost(state: PipelineState, amount: number, reason: string): void; + recordStep(state: PipelineState, reason: string): void; + recordToolCall(state: PipelineState, reason: string): void; + recordRetry(state: PipelineState, reason: string): void; isExhausted(snapshot: BudgetSnapshot, policy: BudgetPolicy): string | null; getEvents(): readonly BudgetChangeEvent[]; } @@ -42,7 +42,7 @@ export function createBudgetTracker(): BudgetTracker { }); } - function checkExhaustion(state: AgentState): void { + function checkExhaustion(state: PipelineState): void { const reason = isExhausted(state.budget, state.budgetPolicy); if (reason && state.budgetPolicy.enforcement === "hard") { forceStop(state, "budget_exhausted", reason); diff --git a/harness/src/core/index.ts b/harness/src/core/index.ts index 920e203..b1c8b57 100644 --- a/harness/src/core/index.ts +++ b/harness/src/core/index.ts @@ -1,5 +1,5 @@ -export { createAgentState } from "./state.js"; -export type { AgentState, AgentStateInit } from "./state.js"; +export { createPipelineState } from "./state.js"; +export type { PipelineState, PipelineStateInit } from "./state.js"; export { transition, forceStop, diff --git a/harness/src/core/llm-circuit-breaker.ts b/harness/src/core/llm-circuit-breaker.ts index 5bb5306..63ab7ac 100644 --- a/harness/src/core/llm-circuit-breaker.ts +++ b/harness/src/core/llm-circuit-breaker.ts @@ -7,7 +7,7 @@ import type { LLMToolDefinition, } from "../backends/types.js"; import type { BudgetTracker } from "./budget-tracker.js"; -import type { AgentState } from "./state.js"; +import type { PipelineState } from "./state.js"; import { forceStop } from "./transitions.js"; import type { CompletionStatus } from "../schemas/audit-envelope.js"; @@ -35,7 +35,7 @@ export interface StreamSafetyConfig { export interface CircuitBreakerOptions { backend: LLMBackend; messages: LLMMessage[]; - state: AgentState; + state: PipelineState; budget: BudgetTracker; llmReason?: string; stream?: boolean; diff --git a/harness/src/core/loop-policy.ts b/harness/src/core/loop-policy.ts index 0143bff..3c3d64c 100644 --- a/harness/src/core/loop-policy.ts +++ b/harness/src/core/loop-policy.ts @@ -1,12 +1,12 @@ /** - * Loop policy engine — configurable guardrails for agent loop behavior. + * Loop policy engine — configurable guardrails for cognitive pipeline behavior. * * Policies are checked before each transition. Violations produce critique * steps in the trace rather than throwing, unless the violation is fatal. */ -import type { AgentState } from "./state.js"; -import type { Phase } from "../schemas/agent-loop.js"; +import type { PipelineState } from "./state.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; export interface LoopPolicy { maxRetries: number; @@ -41,7 +41,7 @@ export interface PolicyViolation { * Returns an empty array if all checks pass. */ export function checkPolicy( - state: AgentState, + state: PipelineState, proposed: Phase, policy: LoopPolicy, ): PolicyViolation[] { @@ -102,7 +102,7 @@ export function checkPolicy( return violations; } -function hasVerifyStep(state: AgentState): boolean { +function hasVerifyStep(state: PipelineState): boolean { return state.trace.steps.some( (s) => s.type === "verify" || s.type === "critique", ); diff --git a/harness/src/core/state.ts b/harness/src/core/state.ts index 45b8e3d..d69dad5 100644 --- a/harness/src/core/state.ts +++ b/harness/src/core/state.ts @@ -1,12 +1,12 @@ /** - * Agent state — the single mutable object that an agent loop carries through + * Cognitive pipeline state - the single mutable object carried through * every transition. Designed so the full state can be serialized for * checkpointing and replay (RFC 0007). */ import { randomUUID } from "node:crypto"; -import type { Phase } from "../schemas/agent-loop.js"; -import { VALID_TRANSITIONS } from "../schemas/agent-loop.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; +import { VALID_TRANSITIONS } from "../schemas/cognitive-pipeline.js"; import type { BudgetPolicy, BudgetSnapshot } from "../schemas/budget.js"; import { createInitialSnapshot, @@ -22,7 +22,7 @@ import type { PermissionGrant } from "../schemas/permission.js"; import type { ToolExecutionReceipt } from "../schemas/receipt.js"; import type { CapabilityManifest } from "../schemas/capability-manifest.js"; -export interface AgentState { +export interface PipelineState { runId: string; objective: string; currentSubtask: string | null; @@ -47,16 +47,16 @@ export interface AgentState { capabilityManifest?: CapabilityManifest; } -export interface AgentStateInit { +export interface PipelineStateInit { objective: string; budgetPolicy?: BudgetPolicy; sandbox?: SandboxConfig; - agentId?: string; + requesterId?: string; } -export function createAgentState(init: AgentStateInit): AgentState { +export function createPipelineState(init: PipelineStateInit): PipelineState { const policy = init.budgetPolicy ?? DEFAULT_BUDGET_POLICY; - const agentId = init.agentId ?? `agent-${randomUUID().slice(0, 8)}`; + const requesterId = init.requesterId ?? `cognitive-runtime-${randomUUID().slice(0, 8)}`; return { runId: randomUUID(), objective: init.objective, @@ -77,7 +77,7 @@ export function createAgentState(init: AgentStateInit): AgentState { steps: [], final_answer: "", }, - telemetry: createInitialTelemetry(agentId), + telemetry: createInitialTelemetry(requesterId), delegationRequests: [], delegationDecisions: [], authorityReceipts: [], diff --git a/harness/src/core/trace-emitter.ts b/harness/src/core/trace-emitter.ts index 6a0a65f..bfddcfa 100644 --- a/harness/src/core/trace-emitter.ts +++ b/harness/src/core/trace-emitter.ts @@ -1,5 +1,5 @@ /** - * Trace emitter — structured event logging for every agent action. + * Trace emitter — structured event logging for every cognitive pipeline action. * * Every transition, budget change, tool call, and completion decision appends * a Step to the running trace. The emitter guarantees monotonically increasing @@ -7,7 +7,7 @@ */ import type { Step, StepType, Trace, ToolInvocation } from "../schemas/trace.js"; -import type { AgentState } from "./state.js"; +import type { PipelineState } from "./state.js"; let _counter = 0; @@ -21,7 +21,7 @@ export function resetStepCounter(): void { } export function emitThought( - state: AgentState, + state: PipelineState, content: string, parent?: string, ): Step { @@ -31,7 +31,7 @@ export function emitThought( } export function emitPlan( - state: AgentState, + state: PipelineState, content: string, parent?: string, ): Step { @@ -42,7 +42,7 @@ export function emitPlan( } export function emitAction( - state: AgentState, + state: PipelineState, content: string, toolInvocation: ToolInvocation, parent?: string, @@ -57,7 +57,7 @@ export function emitAction( } export function emitObservation( - state: AgentState, + state: PipelineState, content: string, parentActionId: string, ): Step { @@ -68,7 +68,7 @@ export function emitObservation( } export function emitCritique( - state: AgentState, + state: PipelineState, content: string, parent?: string, ): Step { @@ -78,7 +78,7 @@ export function emitCritique( } export function emitVerify( - state: AgentState, + state: PipelineState, content: string, verificationStatus: "verified" | "failed" | "unknown", parent?: string, @@ -92,7 +92,7 @@ export function emitVerify( } export function emitSummary( - state: AgentState, + state: PipelineState, content: string, parent?: string, ): Step { @@ -101,7 +101,7 @@ export function emitSummary( return step; } -export function finalizeTrace(state: AgentState, answer: string): Trace { +export function finalizeTrace(state: PipelineState, answer: string): Trace { state.trace.final_answer = answer; state.trace.termination = state.completionStatus; const startedAt = Date.parse(state.telemetry.observed_at); diff --git a/harness/src/core/transitions.ts b/harness/src/core/transitions.ts index c929885..7d2b655 100644 --- a/harness/src/core/transitions.ts +++ b/harness/src/core/transitions.ts @@ -5,10 +5,10 @@ * structured trace step so the full history is replayable. */ -import type { Phase } from "../schemas/agent-loop.js"; -import { VALID_TRANSITIONS, TERMINAL_PHASES } from "../schemas/agent-loop.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; +import { VALID_TRANSITIONS, TERMINAL_PHASES } from "../schemas/cognitive-pipeline.js"; import type { CompletionStatus } from "../schemas/audit-envelope.js"; -import type { AgentState } from "./state.js"; +import type { PipelineState } from "./state.js"; export class InvalidTransitionError extends Error { constructor( @@ -26,7 +26,7 @@ export class InvalidTransitionError extends Error { export class TerminalStateError extends Error { constructor(public readonly phase: Phase) { super( - `Agent is in terminal phase "${phase}" — no further transitions allowed`, + `Cognitive pipeline is in terminal phase "${phase}" — no further transitions allowed`, ); this.name = "TerminalStateError"; } @@ -46,11 +46,11 @@ export function assertTransition(from: Phase, to: Phase): void { } /** - * Transition the agent to a new phase. Mutates state in place and appends a + * Transition the cognitive pipeline to a new phase. Mutates state in place and appends a * trace step documenting the transition. */ export function transition( - state: AgentState, + state: PipelineState, to: Phase, reason: string, ): void { @@ -76,11 +76,11 @@ export function transition( } /** - * Force-stop the agent with a given status. Used for budget exhaustion, + * Force-stop the cognitive pipeline with a given status. Used for budget exhaustion, * safety violations, and external stop signals. */ export function forceStop( - state: AgentState, + state: PipelineState, status: CompletionStatus, reason: string, ): void { diff --git a/harness/src/governance/audit-engine.ts b/harness/src/governance/audit-engine.ts index 79306fa..0b4cf97 100644 --- a/harness/src/governance/audit-engine.ts +++ b/harness/src/governance/audit-engine.ts @@ -5,7 +5,7 @@ import type { DelegationSummary, PermissionSummary, } from "../schemas/audit-envelope.js"; -import type { AgentState } from "../core/state.js"; +import type { PipelineState } from "../core/state.js"; function sha256Hex(input: string): string { return createHash("sha256").update(input, "utf8").digest("hex"); @@ -33,7 +33,7 @@ const ENVELOPE_COMPLETION: ReadonlySet = new Set([ "fail_safe", ]); -function toEnvelopeCompletion(state: AgentState): CompletionStatus { +function toEnvelopeCompletion(state: PipelineState): CompletionStatus { const s = state.completionStatus as string; if (ENVELOPE_COMPLETION.has(s as CompletionStatus)) { return s as CompletionStatus; @@ -48,7 +48,7 @@ function toEnvelopeCompletion(state: AgentState): CompletionStatus { return "fail_safe"; } -function delegationSummaryFromState(state: AgentState): DelegationSummary { +function delegationSummaryFromState(state: PipelineState): DelegationSummary { const decisions = state.delegationDecisions; const grantedCount = decisions.filter( (d) => d.status === "approved" || d.status === "narrowed", @@ -62,7 +62,7 @@ function delegationSummaryFromState(state: AgentState): DelegationSummary { }; } -function permissionSummaryFromState(state: AgentState): PermissionSummary { +function permissionSummaryFromState(state: PipelineState): PermissionSummary { const grants = state.activePermissions; return { total_granted: grants.length, @@ -82,7 +82,7 @@ interface AuditEventOrdering { export interface AuditEvent { event_id: string; run_id: string; - agent_id: string; + requester_id: string; observed_at: string; event_type: string; details: Record; @@ -100,7 +100,7 @@ export class AuditEngine { emit(args: { run_id: string; - agent_id: string; + requester_id: string; event_type: string; details: Record; }): AuditEvent { @@ -116,7 +116,7 @@ export class AuditEngine { const hashInput = stableStringify({ event_id, run_id: args.run_id, - agent_id: args.agent_id, + requester_id: args.requester_id, observed_at, event_type: args.event_type, details: args.details, @@ -128,7 +128,7 @@ export class AuditEngine { const event: AuditEvent = { event_id, run_id: args.run_id, - agent_id: args.agent_id, + requester_id: args.requester_id, observed_at, event_type: args.event_type, details: args.details, @@ -140,7 +140,7 @@ export class AuditEngine { return event; } - seal(state: AgentState): AuditEnvelope { + seal(state: PipelineState): AuditEnvelope { const completed_at = new Date().toISOString(); const trace_hash = sha256Hex(stableStringify(state.trace)); const task_hash = sha256Hex( @@ -161,7 +161,7 @@ export class AuditEngine { schema_version: "0.3", envelope_id, run_id: state.runId, - agent_id: state.telemetry.agent_id, + requester_id: state.telemetry.requester_id, task_hash, started_at, completed_at, diff --git a/harness/src/governance/manifest-builder.ts b/harness/src/governance/manifest-builder.ts index 1cd8741..8bd95d8 100644 --- a/harness/src/governance/manifest-builder.ts +++ b/harness/src/governance/manifest-builder.ts @@ -16,7 +16,7 @@ import type { ToolContract } from "../schemas/tool-invocation.js"; import type { BudgetSnapshot } from "../schemas/budget.js"; import type { SandboxConfig } from "../schemas/sandbox.js"; import type { PolicySet, PolicyRule } from "./policy-evaluator.js"; -import type { Phase } from "../schemas/agent-loop.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; export type WireFormat = "json" | "compact-text" | "toon"; @@ -28,7 +28,7 @@ export interface ManifestToolOverride { export interface ManifestInput { runId: string; - agentId: string; + requesterId: string; phase: Phase; toolContracts: ToolContract[]; sandbox: SandboxConfig; @@ -175,7 +175,7 @@ export function buildManifest(input: ManifestInput): CapabilityManifest { return { manifest_id: randomUUID(), run_id: input.runId, - agent_id: input.agentId, + requester_id: input.requesterId, timestamp: new Date().toISOString(), phase: input.phase, tools: { available, blocked }, diff --git a/harness/src/governance/opa-policy-engine.ts b/harness/src/governance/opa-policy-engine.ts index 14a2eae..f6a42ee 100644 --- a/harness/src/governance/opa-policy-engine.ts +++ b/harness/src/governance/opa-policy-engine.ts @@ -49,17 +49,17 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { async evaluate( request: DelegationRequest, - agentId: string, + requesterId: string, ): Promise { try { - const result = await this.queryOpa(request, agentId); - return this.toDecision(request, agentId, result); + const result = await this.queryOpa(request, requesterId); + return this.toDecision(request, requesterId, result); } catch (err) { if (this.config.fallbackEngine) { - return this.config.fallbackEngine.evaluate(request, agentId); + return this.config.fallbackEngine.evaluate(request, requesterId); } const message = err instanceof Error ? err.message : String(err); - return createDelegationDecision(request, agentId, { + return createDelegationDecision(request, requesterId, { status: "denied", decidedBy: { kind: "harness" }, policyRefs: [], @@ -75,7 +75,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { const decision = await this.evaluate( createSyntheticRequest({ runId: input.runId, - requester: input.agentId, + requester: input.requesterId, scope: { resource: `phase:${input.phase}`, action: "read", @@ -84,7 +84,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { intent: `Consult policy hook for phase ${input.phase}`, justification: `Runtime policy consultation at ${input.phase}`, }), - input.agentId, + input.requesterId, ); if (decision.status === "denied" || decision.status === "escalated") { return { @@ -120,7 +120,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { const decision = await this.evaluate( createSyntheticRequest({ runId: input.runId, - requester: input.agentId, + requester: input.requesterId, scope: { resource: `tool:${tool.name}`, action: "execute", @@ -129,7 +129,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { intent: `Preview tool access for ${tool.name}`, justification: `Manifest compilation for phase ${input.phase}`, }), - input.agentId, + input.requesterId, ); return [tool.name, toToolAccessPreview(decision)] as const; }), @@ -139,7 +139,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { private async queryOpa( request: DelegationRequest, - agentId: string, + requesterId: string, ): Promise { const controller = new AbortController(); const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS; @@ -158,7 +158,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { body: JSON.stringify({ input: { request, - agent_id: agentId, + requester_id: requesterId, context: this.config.inputContext ?? {}, }, }), @@ -182,7 +182,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { private toDecision( request: DelegationRequest, - agentId: string, + requesterId: string, result: OpaResult, ): DelegationDecision { if (!result.status) { @@ -196,7 +196,7 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { ? result.denial_reason ?? "Denied by OPA policy" : result.denial_reason; - return createDelegationDecision(request, agentId, { + return createDelegationDecision(request, requesterId, { status: result.status, decidedBy, policyRefs, diff --git a/harness/src/governance/policy-engine.ts b/harness/src/governance/policy-engine.ts index 9b1606c..a4bb5ce 100644 --- a/harness/src/governance/policy-engine.ts +++ b/harness/src/governance/policy-engine.ts @@ -6,7 +6,7 @@ import type { DelegationStatus, RequestedScope, } from "../schemas/delegation.js"; -import type { Phase } from "../schemas/agent-loop.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; import type { SandboxConfig } from "../schemas/sandbox.js"; import type { ToolContract } from "../schemas/tool-invocation.js"; import { PolicyEvaluator } from "./policy-evaluator.js"; @@ -26,7 +26,7 @@ export interface ToolAccessPreview { export interface ToolAccessPreviewInput { runId: string; - agentId: string; + requesterId: string; objective: string; phase: Phase; tools: ToolContract[]; @@ -36,7 +36,7 @@ export interface ToolAccessPreviewInput { export interface PolicyPhaseConsultationInput { runId: string; - agentId: string; + requesterId: string; objective: string; phase: Phase; context?: Record; @@ -52,7 +52,7 @@ export interface DelegationPolicyEngine { readonly name: string; evaluate( request: DelegationRequest, - agentId: string, + requesterId: string, ): Promise; consultPhase?( input: PolicyPhaseConsultationInput, @@ -91,14 +91,14 @@ function stableStringify(value: unknown): string { export function createDelegationDecision( request: DelegationRequest, - agentId: string, + requesterId: string, draft: DelegationDecisionDraft, ): DelegationDecision { const decidedAt = draft.decidedAt ?? new Date().toISOString(); const policyRefs = draft.policyRefs ?? []; const basis = stableStringify({ request_id: request.request_id, - agent_id: agentId, + requester_id: requesterId, scope: request.requested_scope, status: draft.status, decided_by: draft.decidedBy, @@ -149,9 +149,9 @@ export class InProcessPolicyEngine implements DelegationPolicyEngine { async evaluate( request: DelegationRequest, - agentId: string, + requesterId: string, ): Promise { - return this.evaluator.evaluate(request, agentId); + return this.evaluator.evaluate(request, requesterId); } async consultPhase( @@ -163,7 +163,7 @@ export class InProcessPolicyEngine implements DelegationPolicyEngine { const decision = this.evaluator.evaluate( createSyntheticRequest({ runId: input.runId, - requester: input.agentId, + requester: input.requesterId, intent: `Consult phase ${input.phase}`, justification: `Policy consultation at phase ${input.phase}`, scope: { @@ -172,7 +172,7 @@ export class InProcessPolicyEngine implements DelegationPolicyEngine { constraints: input.context, }, }), - input.agentId, + input.requesterId, ); return toPhaseConsultationDecision(decision); } @@ -193,7 +193,7 @@ export class InProcessPolicyEngine implements DelegationPolicyEngine { const decision = this.evaluator.evaluate( createSyntheticRequest({ runId: input.runId, - requester: input.agentId, + requester: input.requesterId, intent: `Preview tool access for ${tool.name}`, justification: `Manifest compilation for phase ${input.phase}`, scope: { @@ -202,7 +202,7 @@ export class InProcessPolicyEngine implements DelegationPolicyEngine { constraints: input.context, }, }), - input.agentId, + input.requesterId, ); preview[tool.name] = toToolAccessPreview(decision); } diff --git a/harness/src/governance/policy-evaluator.ts b/harness/src/governance/policy-evaluator.ts index d03ba81..7a8fc77 100644 --- a/harness/src/governance/policy-evaluator.ts +++ b/harness/src/governance/policy-evaluator.ts @@ -56,14 +56,14 @@ function stableStringify(value: unknown): string { return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(",")}}`; } -function subjectMatches(agentId: string, pattern: string): boolean { +function subjectMatches(requesterId: string, pattern: string): boolean { if (pattern === "*") { return true; } - if (agentId === pattern) { + if (requesterId === pattern) { return true; } - return agentId.startsWith(`${pattern}:`); + return requesterId.startsWith(`${pattern}:`); } function resourceMatches(scopeResource: string, pattern: string): boolean { @@ -90,12 +90,12 @@ function isWithinHalfOpenWindow( function ruleMatches( rule: PolicyRule, - agentId: string, + requesterId: string, requestedScope: RequestedScope, nowIso: string, ): boolean { const subjectPattern = rule.subject ?? "*"; - if (!subjectMatches(agentId, subjectPattern)) { + if (!subjectMatches(requesterId, subjectPattern)) { return false; } if (!resourceMatches(requestedScope.resource, rule.resource)) { @@ -172,7 +172,7 @@ export class PolicyEvaluator { this.policies = this.policies.filter((p) => p.policy_id !== policyId); } - evaluate(request: DelegationRequest, agent_id: string): DelegationDecision { + evaluate(request: DelegationRequest, requester_id: string): DelegationDecision { const nowIso = new Date().toISOString(); const sorted = [...this.policies] .filter((p) => isWithinHalfOpenWindow(nowIso, p.effective_at, p.expires_at)) @@ -184,7 +184,7 @@ export class PolicyEvaluator { for (const policy of sorted) { const rule = policy.rules.find((r) => - ruleMatches(r, agent_id, request.requested_scope, nowIso), + ruleMatches(r, requester_id, request.requested_scope, nowIso), ); if (!rule) { continue; @@ -193,7 +193,7 @@ export class PolicyEvaluator { if (rule.action === "deny") { return this.buildDecision({ request, - agent_id, + requester_id, status: "denied", decidedBy: { kind: "policy", policy_id: policy.policy_id }, policyRefs: [policy.policy_id], @@ -228,7 +228,7 @@ export class PolicyEvaluator { ); return this.buildDecision({ request, - agent_id, + requester_id, status: "narrowed", decidedBy: { kind: "policy", policy_id: firstNarrow.policy.policy_id }, policyRefs: [firstNarrow.policy.policy_id], @@ -244,7 +244,7 @@ export class PolicyEvaluator { if (firstEscalation) { return this.buildDecision({ request, - agent_id, + requester_id, status: "escalated", decidedBy: { kind: "policy", @@ -265,7 +265,7 @@ export class PolicyEvaluator { if (firstAllow) { return this.buildDecision({ request, - agent_id, + requester_id, status: "approved", decidedBy: { kind: "policy", policy_id: firstAllow.policy.policy_id }, policyRefs: [firstAllow.policy.policy_id], @@ -280,7 +280,7 @@ export class PolicyEvaluator { return this.buildDecision({ request, - agent_id, + requester_id, status: "denied", decidedBy: { kind: "harness" }, policyRefs: [], @@ -295,7 +295,7 @@ export class PolicyEvaluator { private buildDecision(args: { request: DelegationRequest; - agent_id: string; + requester_id: string; status: DelegationStatus; decidedBy: DecidedBy; policyRefs: string[]; @@ -308,7 +308,7 @@ export class PolicyEvaluator { }): DelegationDecision { const basis = stableStringify({ request_id: args.request.request_id, - agent_id: args.agent_id, + requester_id: args.requester_id, scope: args.request.requested_scope, status: args.status, decided_by: args.decidedBy, diff --git a/harness/src/index.ts b/harness/src/index.ts index 3fd10a3..a884a21 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -1,8 +1,9 @@ /** - * @open-cot/harness — Reference implementation of the Open CoT standard. + * @open-cot/core - Reference package for the Open CoT standard. * * This package proves the standard is executable, testable, and operational - * by implementing agents that emit and consume RFC-compliant traces. + * by exposing cognitive artifact, policy boundary, and reconciliation helpers + * that emit and consume RFC-compliant traces. */ // Schema types @@ -23,5 +24,5 @@ export * from "./governance/index.js"; // Adapters export * from "./adapters/index.js"; -// Agents -export * from "./agents/index.js"; +// Pipelines +export * from "./pipelines/index.js"; diff --git a/harness/src/agents/chat-agent.ts b/harness/src/pipelines/chat-pipeline.ts similarity index 95% rename from harness/src/agents/chat-agent.ts rename to harness/src/pipelines/chat-pipeline.ts index 541d460..d9a4680 100644 --- a/harness/src/agents/chat-agent.ts +++ b/harness/src/pipelines/chat-pipeline.ts @@ -1,11 +1,11 @@ /** - * Chat agent — policy-governed conversational loop with optional tool use. + * Chat cognitive pipeline — policy-governed conversational loop with optional tool use. */ import { randomUUID } from "node:crypto"; import type { LLMBackend, LLMMessage, LLMResponseWithTools } from "../backends/types.js"; -import type { AgentState } from "../core/state.js"; -import { createAgentState } from "../core/state.js"; +import type { PipelineState } from "../core/state.js"; +import { createPipelineState } from "../core/state.js"; import { transition, forceStop } from "../core/transitions.js"; import { createBudgetTracker } from "../core/budget-tracker.js"; import { callLLMWithCircuitBreaker } from "../core/llm-circuit-breaker.js"; @@ -34,9 +34,9 @@ import type { import { InProcessPolicyEngine } from "../governance/policy-engine.js"; import { buildSandboxPolicySets } from "../governance/sandbox-policies.js"; import type { DelegationRequest } from "../schemas/delegation.js"; -import type { Phase } from "../schemas/agent-loop.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; -function halted(state: AgentState): boolean { +function halted(state: PipelineState): boolean { return state.phase === "audit_seal"; } @@ -45,7 +45,7 @@ export interface ChatGovernanceOptions { policyEngine?: DelegationPolicyEngine; } -export async function runChatAgent( +export async function runChatPipeline( backend: LLMBackend, objective: string, toolRegistry: ToolRegistry, @@ -62,7 +62,7 @@ export async function runChatAgent( const effectivePolicies = governance?.policies ?? defaultPolicies; const policyEngine = governance?.policyEngine ?? new InProcessPolicyEngine(effectivePolicies); - const state = createAgentState({ + const state = createPipelineState({ objective, budgetPolicy, sandbox: sb, @@ -105,7 +105,7 @@ export async function runChatAgent( } const decision = await policyEngine.consultPhase({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, objective, phase, context, @@ -135,7 +135,7 @@ export async function runChatAgent( if (policyEngine.previewToolAccess) { const preview = await policyEngine.previewToolAccess({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, objective, phase, tools: toolContracts, @@ -155,7 +155,7 @@ export async function runChatAgent( } const manifest = buildManifest({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, phase, toolContracts, sandbox: sb, @@ -255,7 +255,7 @@ export async function runChatAgent( const delegationRequest: DelegationRequest = { schema_version: "0.2", request_id: `req-${randomUUID()}`, - requester: state.telemetry.agent_id, + requester: state.telemetry.requester_id, run_id: state.runId, intent: `Use ${tc.toolName} to support objective`, justification: `Model selected tool ${tc.toolName}`, @@ -272,7 +272,7 @@ export async function runChatAgent( transition(state, "validate_authority", "Policy evaluation complete"); const decision = await policyEngine.evaluate( delegationRequest, - state.telemetry.agent_id, + state.telemetry.requester_id, ); budget.recordStep(state, "validate_authority"); if (decision.status === "denied") { diff --git a/harness/src/agents/coder-agent.ts b/harness/src/pipelines/coder-pipeline.ts similarity index 95% rename from harness/src/agents/coder-agent.ts rename to harness/src/pipelines/coder-pipeline.ts index 3339d64..a737c0c 100644 --- a/harness/src/agents/coder-agent.ts +++ b/harness/src/pipelines/coder-pipeline.ts @@ -1,11 +1,11 @@ /** - * Coder agent — policy-governed plan / execute / observe / critique with repair loop. + * Coder cognitive pipeline — policy-governed plan / execute / observe / critique with repair loop. */ import { randomUUID } from "node:crypto"; import type { LLMBackend, LLMMessage, LLMResponseWithTools } from "../backends/types.js"; -import type { AgentState } from "../core/state.js"; -import { createAgentState } from "../core/state.js"; +import type { PipelineState } from "../core/state.js"; +import { createPipelineState } from "../core/state.js"; import { transition, forceStop } from "../core/transitions.js"; import { createBudgetTracker } from "../core/budget-tracker.js"; import { callLLMWithCircuitBreaker } from "../core/llm-circuit-breaker.js"; @@ -33,9 +33,9 @@ import type { import { InProcessPolicyEngine } from "../governance/policy-engine.js"; import { buildSandboxPolicySets } from "../governance/sandbox-policies.js"; import type { DelegationRequest } from "../schemas/delegation.js"; -import type { Phase } from "../schemas/agent-loop.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; -function halted(state: AgentState): boolean { +function halted(state: PipelineState): boolean { return state.phase === "audit_seal"; } @@ -44,7 +44,7 @@ export interface CoderGovernanceOptions { policyEngine?: DelegationPolicyEngine; } -export async function runCoderAgent( +export async function runCoderPipeline( backend: LLMBackend, objective: string, toolRegistry: ToolRegistry, @@ -62,7 +62,7 @@ export async function runCoderAgent( const effectivePolicies = governance?.policies ?? defaultPolicies; const policyEngine = governance?.policyEngine ?? new InProcessPolicyEngine(effectivePolicies); - const state = createAgentState({ + const state = createPipelineState({ objective, budgetPolicy, sandbox: sb, @@ -105,7 +105,7 @@ export async function runCoderAgent( } const decision = await policyEngine.consultPhase({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, objective, phase, context, @@ -135,7 +135,7 @@ export async function runCoderAgent( if (policyEngine.previewToolAccess) { const preview = await policyEngine.previewToolAccess({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, objective, phase, tools: toolContracts, @@ -155,7 +155,7 @@ export async function runCoderAgent( } const manifest = buildManifest({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, phase, toolContracts, sandbox: state.sandbox, @@ -247,7 +247,7 @@ export async function runCoderAgent( const delegationRequest: DelegationRequest = { schema_version: "0.2", request_id: `req-${randomUUID()}`, - requester: state.telemetry.agent_id, + requester: state.telemetry.requester_id, run_id: state.runId, intent: `Use ${tc.toolName} for coding objective`, justification: `Model selected ${tc.toolName}`, @@ -264,7 +264,7 @@ export async function runCoderAgent( transition(state, "validate_authority", "Policy evaluation complete"); const decision = await policyEngine.evaluate( delegationRequest, - state.telemetry.agent_id, + state.telemetry.requester_id, ); budget.recordStep(state, "validate_authority"); if (decision.status === "denied") { diff --git a/harness/src/agents/governed-agent.ts b/harness/src/pipelines/governed-pipeline.ts similarity index 94% rename from harness/src/agents/governed-agent.ts rename to harness/src/pipelines/governed-pipeline.ts index 29e1bac..f629077 100644 --- a/harness/src/agents/governed-agent.ts +++ b/harness/src/pipelines/governed-pipeline.ts @@ -1,12 +1,12 @@ /** - * Governed agent — demonstrates the full governed execution FSM with + * Governed cognitive pipeline — demonstrates the full governed execution FSM with * delegation, policy evaluation, permission management, and audit sealing. */ import { createHash, randomUUID } from "node:crypto"; import type { LLMBackend, LLMMessage, LLMResponseWithTools } from "../backends/types.js"; -import type { AgentState } from "../core/state.js"; -import { createAgentState } from "../core/state.js"; +import type { PipelineState } from "../core/state.js"; +import { createPipelineState } from "../core/state.js"; import { transition, forceStop } from "../core/transitions.js"; import { createBudgetTracker } from "../core/budget-tracker.js"; import { callLLMWithCircuitBreaker } from "../core/llm-circuit-breaker.js"; @@ -30,44 +30,44 @@ import type { SandboxConfig } from "../schemas/sandbox.js"; import { buildManifest, serializeManifest } from "../governance/manifest-builder.js"; import type { WireFormat } from "../governance/manifest-builder.js"; import { toLLMToolDefinitions } from "../tools/llm-tools.js"; -import type { Phase } from "../schemas/agent-loop.js"; +import type { Phase } from "../schemas/cognitive-pipeline.js"; function sha256(data: string): string { return createHash("sha256").update(data).digest("hex"); } -function halted(state: AgentState): boolean { +function halted(state: PipelineState): boolean { return state.phase === "audit_seal"; } -export interface GovernedAgentConfig { +export interface GovernedPipelineConfig { objective: string; backend: LLMBackend; toolRegistry: ToolRegistry; policies?: PolicySet[]; policyEngine?: DelegationPolicyEngine; - agentId?: string; + requesterId?: string; budgetPolicy?: BudgetPolicy; sandbox?: SandboxConfig; wireFormat?: WireFormat; } -export interface GovernedAgentResult { +export interface GovernedPipelineResult { trace: Trace; envelope: AuditEnvelope; - state: AgentState; + state: PipelineState; } -export async function runGovernedAgent( - config: GovernedAgentConfig, -): Promise { +export async function runGovernedPipeline( + config: GovernedPipelineConfig, +): Promise { emit.resetStepCounter(); const budget = createBudgetTracker(); - const state = createAgentState({ + const state = createPipelineState({ objective: config.objective, budgetPolicy: config.budgetPolicy, sandbox: config.sandbox, - agentId: config.agentId, + requesterId: config.requesterId, }); const permissionMgr = new PermissionManager(); @@ -105,7 +105,7 @@ export async function runGovernedAgent( return response; }; - const finish = (answer: string): GovernedAgentResult => { + const finish = (answer: string): GovernedPipelineResult => { permissionMgr.revokeAll("run finalized"); syncPermissionState(); emit.finalizeTrace( @@ -132,7 +132,7 @@ export async function runGovernedAgent( } const decision = await policyEngine.consultPhase({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, objective: config.objective, phase, context, @@ -162,7 +162,7 @@ export async function runGovernedAgent( if (policyEngine.previewToolAccess) { const preview = await policyEngine.previewToolAccess({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, objective: config.objective, phase: state.phase, tools: toolContracts, @@ -184,7 +184,7 @@ export async function runGovernedAgent( } const manifest = buildManifest({ runId: state.runId, - agentId: state.telemetry.agent_id, + requesterId: state.telemetry.requester_id, phase: state.phase, toolContracts, sandbox: state.sandbox, @@ -288,7 +288,7 @@ export async function runGovernedAgent( const request: DelegationRequest = { schema_version: "0.2", request_id: `req-${randomUUID().slice(0, 8)}`, - requester: state.telemetry.agent_id, + requester: state.telemetry.requester_id, run_id: state.runId, intent: `Execute tool ${tc.toolName}`, justification: planResp.content.slice(0, 500), @@ -307,7 +307,7 @@ export async function runGovernedAgent( transition(state, "validate_authority", `Validate ${tc.toolName}`); const decision = await policyEngine.evaluate( request, - state.telemetry.agent_id, + state.telemetry.requester_id, ); state.delegationDecisions.push(decision); budget.recordStep(state, "validate_authority"); diff --git a/harness/src/pipelines/index.ts b/harness/src/pipelines/index.ts new file mode 100644 index 0000000..a920c88 --- /dev/null +++ b/harness/src/pipelines/index.ts @@ -0,0 +1,7 @@ +export { runChatPipeline } from "./chat-pipeline.js"; +export { runCoderPipeline } from "./coder-pipeline.js"; +export { runGovernedPipeline } from "./governed-pipeline.js"; +export type { ChatGovernanceOptions } from "./chat-pipeline.js"; +export type { CoderGovernanceOptions } from "./coder-pipeline.js"; +export type { GovernedPipelineConfig, GovernedPipelineResult } from "./governed-pipeline.js"; +export type { WireFormat } from "../governance/manifest-builder.js"; diff --git a/harness/src/schemas/audit-envelope.ts b/harness/src/schemas/audit-envelope.ts index 595d5f8..dd40944 100644 --- a/harness/src/schemas/audit-envelope.ts +++ b/harness/src/schemas/audit-envelope.ts @@ -46,7 +46,7 @@ export interface AuditEnvelope { schema_version: "0.2" | "0.3"; envelope_id: string; run_id: string; - agent_id: string; + requester_id: string; task_hash: string; started_at: string; completed_at: string; diff --git a/harness/src/schemas/capability-manifest.ts b/harness/src/schemas/capability-manifest.ts index 7df0513..f514513 100644 --- a/harness/src/schemas/capability-manifest.ts +++ b/harness/src/schemas/capability-manifest.ts @@ -26,7 +26,7 @@ export interface ManifestBudget { export interface CapabilityManifest { manifest_id: string; run_id: string; - agent_id: string; + requester_id: string; timestamp: string; phase: string; tools: { diff --git a/harness/src/schemas/agent-loop.ts b/harness/src/schemas/cognitive-pipeline.ts similarity index 94% rename from harness/src/schemas/agent-loop.ts rename to harness/src/schemas/cognitive-pipeline.ts index efcedf7..efd01ea 100644 --- a/harness/src/schemas/agent-loop.ts +++ b/harness/src/schemas/cognitive-pipeline.ts @@ -1,9 +1,9 @@ /** * Governed Execution FSM — RFC 0007 v0.3. * - * Defines the 14-state finite state machine for permission-aware agent + * Defines the 14-state finite state machine for permission-aware cognitive pipeline * execution. Replaces the original 7-phase simple loop with a governed - * model where the model proposes and the harness decides. + * model where cognition proposes and the runtime boundary reconciles. */ export const ALL_PHASES = [ diff --git a/harness/src/schemas/delegation.ts b/harness/src/schemas/delegation.ts index d4742ff..0ba0605 100644 --- a/harness/src/schemas/delegation.ts +++ b/harness/src/schemas/delegation.ts @@ -24,10 +24,10 @@ export interface DelegationProvenance { export interface DelegationRequest { schema_version: "0.2"; request_id: string; - /** Harness-verified agent identity — never model-provided. */ + /** Harness-verified cognitive pipeline identity — never model-provided. */ requester: string; run_id: string; - /** Model-provided: what the agent wants to accomplish. */ + /** Model-provided: what the cognitive pipeline wants to accomplish. */ intent: string; /** Model-provided: why this capability is needed. */ justification: string; diff --git a/harness/src/schemas/index.ts b/harness/src/schemas/index.ts index e7ae25f..26b2e61 100644 --- a/harness/src/schemas/index.ts +++ b/harness/src/schemas/index.ts @@ -1,4 +1,4 @@ -export * from "./agent-loop.js"; +export * from "./cognitive-pipeline.js"; export * from "./budget.js"; export * from "./sandbox.js"; export * from "./telemetry.js"; diff --git a/harness/src/schemas/sandbox.ts b/harness/src/schemas/sandbox.ts index fa4161d..d93841c 100644 --- a/harness/src/schemas/sandbox.ts +++ b/harness/src/schemas/sandbox.ts @@ -1,5 +1,5 @@ /** - * Sandbox / safety types — RFC 0017 (Agent Safety & Sandboxing). + * Sandbox / safety types — RFC 0017 (Cognitive pipeline Safety & Sandboxing). * * Defines the sandbox configuration that policy engines consume at run start. */ diff --git a/harness/src/schemas/telemetry.ts b/harness/src/schemas/telemetry.ts index dd8845b..0c825e4 100644 --- a/harness/src/schemas/telemetry.ts +++ b/harness/src/schemas/telemetry.ts @@ -1,7 +1,7 @@ /** - * Telemetry types — RFC 0031 (Agent Observability & Telemetry). + * Telemetry types — RFC 0031 (Cognitive pipeline Observability & Telemetry). * - * Mirrors schemas/rfc-0031-agent-observability-telemetry.json. + * Mirrors schemas/rfc-0031-cognitive-pipeline-observability-telemetry.json. */ export interface TelemetryMetrics { @@ -20,16 +20,16 @@ export interface TelemetryOrdering { export interface TelemetryRecord { version: string; - agent_id: string; + requester_id: string; observed_at: string; ordering: TelemetryOrdering; metrics: TelemetryMetrics; } -export function createInitialTelemetry(agentId: string): TelemetryRecord { +export function createInitialTelemetry(requesterId: string): TelemetryRecord { return { version: "0.2", - agent_id: agentId, + requester_id: requesterId, observed_at: new Date().toISOString(), ordering: { event_seq: 0, diff --git a/harness/tests/budget.test.ts b/harness/tests/budget.test.ts index 9ccc9d6..1a52602 100644 --- a/harness/tests/budget.test.ts +++ b/harness/tests/budget.test.ts @@ -1,10 +1,10 @@ import { describe, it, expect } from "vitest"; -import { createAgentState } from "../src/core/state.js"; +import { createPipelineState } from "../src/core/state.js"; import { createBudgetTracker } from "../src/core/budget-tracker.js"; describe("BudgetTracker", () => { it("records token usage", () => { - const state = createAgentState({ objective: "budget test" }); + const state = createPipelineState({ objective: "budget test" }); const tracker = createBudgetTracker(); tracker.recordTokens(state, 100, "LLM call"); expect(state.budget.tokensUsed).toBe(100); @@ -12,7 +12,7 @@ describe("BudgetTracker", () => { }); it("records cost", () => { - const state = createAgentState({ objective: "cost test" }); + const state = createPipelineState({ objective: "cost test" }); const tracker = createBudgetTracker(); tracker.recordCost(state, 0.5, "API call"); expect(state.budget.costUsed).toBeCloseTo(0.5); @@ -20,7 +20,7 @@ describe("BudgetTracker", () => { }); it("records steps", () => { - const state = createAgentState({ objective: "step test" }); + const state = createPipelineState({ objective: "step test" }); const tracker = createBudgetTracker(); tracker.recordStep(state, "plan"); tracker.recordStep(state, "act"); @@ -29,7 +29,7 @@ describe("BudgetTracker", () => { }); it("records tool calls and updates telemetry", () => { - const state = createAgentState({ objective: "tool test" }); + const state = createPipelineState({ objective: "tool test" }); const tracker = createBudgetTracker(); tracker.recordToolCall(state, "search"); expect(state.budget.toolCallsUsed).toBe(1); @@ -37,7 +37,7 @@ describe("BudgetTracker", () => { }); it("records retries", () => { - const state = createAgentState({ objective: "retry test" }); + const state = createPipelineState({ objective: "retry test" }); const tracker = createBudgetTracker(); tracker.recordRetry(state, "failed verification"); expect(state.budget.retriesUsed).toBe(1); @@ -45,7 +45,7 @@ describe("BudgetTracker", () => { }); it("force-stops when step budget is exhausted with hard enforcement", () => { - const state = createAgentState({ + const state = createPipelineState({ objective: "exhaust test", budgetPolicy: { maxTokens: 100_000, @@ -65,7 +65,7 @@ describe("BudgetTracker", () => { }); it("does NOT force-stop with soft enforcement", () => { - const state = createAgentState({ + const state = createPipelineState({ objective: "soft test", budgetPolicy: { maxTokens: 100_000, @@ -84,7 +84,7 @@ describe("BudgetTracker", () => { }); it("keeps events log", () => { - const state = createAgentState({ objective: "events test" }); + const state = createPipelineState({ objective: "events test" }); const tracker = createBudgetTracker(); tracker.recordTokens(state, 50, "call 1"); tracker.recordTokens(state, 75, "call 2"); @@ -95,7 +95,7 @@ describe("BudgetTracker", () => { }); it("isExhausted returns reason when budget is empty", () => { - const state = createAgentState({ + const state = createPipelineState({ objective: "check test", budgetPolicy: { maxTokens: 100, diff --git a/harness/tests/chat-agent.test.ts b/harness/tests/chat-pipeline.test.ts similarity index 87% rename from harness/tests/chat-agent.test.ts rename to harness/tests/chat-pipeline.test.ts index a7a2730..b433772 100644 --- a/harness/tests/chat-agent.test.ts +++ b/harness/tests/chat-pipeline.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, beforeEach } from "vitest"; -import { runChatAgent } from "../src/agents/chat-agent.js"; +import { runChatPipeline } from "../src/pipelines/chat-pipeline.js"; import { MockLLMBackend } from "../src/backends/mock.js"; import { createMockToolRegistry } from "../src/tools/mock-tools.js"; import { resetStepCounter } from "../src/core/trace-emitter.js"; @@ -9,13 +9,13 @@ import { } from "../src/core/validator.js"; import type { PolicySet } from "../src/governance/policy-evaluator.js"; -describe("ChatAgent (mock backend)", () => { +describe("ChatPipeline (mock backend)", () => { beforeEach(() => { resetStepCounter(); }); it("completes a simple question with a valid trace", async () => { - const trace = await runChatAgent( + const trace = await runChatPipeline( new MockLLMBackend(), "What is 2 + 2? Calculate it.", createMockToolRegistry(), @@ -27,7 +27,7 @@ describe("ChatAgent (mock backend)", () => { }); it("emits valid termination status", async () => { - const trace = await runChatAgent( + const trace = await runChatPipeline( new MockLLMBackend(), "Plan how to organize a project.", createMockToolRegistry(), @@ -38,7 +38,7 @@ describe("ChatAgent (mock backend)", () => { }); it("pairs all actions with observations", async () => { - const trace = await runChatAgent( + const trace = await runChatPipeline( new MockLLMBackend(), "Search for the capital of France.", createMockToolRegistry(), @@ -49,7 +49,7 @@ describe("ChatAgent (mock backend)", () => { }); it("records LLM usage in trace steps", async () => { - const trace = await runChatAgent( + const trace = await runChatPipeline( new MockLLMBackend(), "Tell me about Tokyo.", createMockToolRegistry(), @@ -60,7 +60,7 @@ describe("ChatAgent (mock backend)", () => { }); it("stops on budget exhaustion", async () => { - const trace = await runChatAgent( + const trace = await runChatPipeline( new MockLLMBackend(), "Plan a complex analysis.", createMockToolRegistry(), @@ -78,7 +78,7 @@ describe("ChatAgent (mock backend)", () => { }); it("trace steps have unique IDs", async () => { - const trace = await runChatAgent( + const trace = await runChatPipeline( new MockLLMBackend(), "Search for the speed of light.", createMockToolRegistry(), @@ -102,7 +102,7 @@ describe("ChatAgent (mock backend)", () => { }, ], }; - const trace = await runChatAgent( + const trace = await runChatPipeline( new MockLLMBackend(), "Search for weather alerts.", createMockToolRegistry(), diff --git a/harness/tests/coder-agent.test.ts b/harness/tests/coder-pipeline.test.ts similarity index 89% rename from harness/tests/coder-agent.test.ts rename to harness/tests/coder-pipeline.test.ts index 3795883..3ce6acc 100644 --- a/harness/tests/coder-agent.test.ts +++ b/harness/tests/coder-pipeline.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, beforeEach } from "vitest"; -import { runCoderAgent } from "../src/agents/coder-agent.js"; +import { runCoderPipeline } from "../src/pipelines/coder-pipeline.js"; import { MockLLMBackend } from "../src/backends/mock.js"; import { createMockToolRegistry, @@ -12,14 +12,14 @@ import { } from "../src/core/validator.js"; import type { PolicySet } from "../src/governance/policy-evaluator.js"; -describe("CoderAgent (mock backend)", () => { +describe("CoderPipeline (mock backend)", () => { beforeEach(() => { resetStepCounter(); resetMockFileSystem(); }); it("completes a coding task through full FSM traversal", async () => { - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Read the file src/main.ts and modify it to add a greeting.", createMockToolRegistry(), @@ -31,7 +31,7 @@ describe("CoderAgent (mock backend)", () => { }); it("emits valid termination status", async () => { - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Write a new utility function.", createMockToolRegistry(), @@ -42,7 +42,7 @@ describe("CoderAgent (mock backend)", () => { }); it("pairs all actions with observations", async () => { - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Inspect file and make changes.", createMockToolRegistry(), @@ -53,7 +53,7 @@ describe("CoderAgent (mock backend)", () => { }); it("exercises governed FSM phases in the trace", async () => { - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Read src/main.ts, write a fix, then verify the changes.", createMockToolRegistry(), @@ -68,7 +68,7 @@ describe("CoderAgent (mock backend)", () => { }); it("emits a substantive trace", async () => { - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Modify the project files.", createMockToolRegistry(), @@ -79,7 +79,7 @@ describe("CoderAgent (mock backend)", () => { }); it("stops on budget exhaustion mid-task", async () => { - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Do a complex refactoring.", createMockToolRegistry(), @@ -97,7 +97,7 @@ describe("CoderAgent (mock backend)", () => { }); it("respects sandbox tool blocking", async () => { - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Write a file to disk.", createMockToolRegistry(), @@ -132,7 +132,7 @@ describe("CoderAgent (mock backend)", () => { }, ], }; - const trace = await runCoderAgent( + const trace = await runCoderPipeline( new MockLLMBackend(), "Modify src/main.ts to add a greeting.", createMockToolRegistry(), diff --git a/harness/tests/governance.test.ts b/harness/tests/governance.test.ts index b389cd9..71ad97e 100644 --- a/harness/tests/governance.test.ts +++ b/harness/tests/governance.test.ts @@ -5,7 +5,7 @@ import type { PolicySet, PolicyRule } from "../src/governance/policy-evaluator.j import { AuthBroker } from "../src/governance/auth-broker.js"; import { AuditEngine } from "../src/governance/audit-engine.js"; import type { DelegationRequest, DelegationDecision } from "../src/schemas/delegation.js"; -import { createAgentState } from "../src/core/state.js"; +import { createPipelineState } from "../src/core/state.js"; const sampleScope = { resource: "tool:search", @@ -16,7 +16,7 @@ function makeRequest(overrides?: Partial): DelegationRequest return { schema_version: "0.2", request_id: "req-test-1", - requester: "agent-test", + requester: "cognitive-pipeline-test", run_id: "run-test", intent: "Search the web", justification: "Need external facts", @@ -30,7 +30,7 @@ describe("PermissionManager", () => { it("grant() creates an active permission", () => { const pm = new PermissionManager(); const grant = pm.grant({ - granted_to: "agent-1", + granted_to: "cognitive-pipeline-1", scope: sampleScope, audience: "tool:search", ttl_seconds: 3600, @@ -49,7 +49,7 @@ describe("PermissionManager", () => { it("consume() marks one-shot permission as consumed", () => { const pm = new PermissionManager(); const grant = pm.grant({ - granted_to: "agent-1", + granted_to: "cognitive-pipeline-1", scope: sampleScope, audience: "tool:search", ttl_seconds: 3600, @@ -64,7 +64,7 @@ describe("PermissionManager", () => { it("consume() rejects non-one-shot permissions", () => { const pm = new PermissionManager(); const grant = pm.grant({ - granted_to: "agent-1", + granted_to: "cognitive-pipeline-1", scope: sampleScope, audience: "tool:search", ttl_seconds: 3600, @@ -79,7 +79,7 @@ describe("PermissionManager", () => { it("isValid() returns false for expired permissions", () => { const pm = new PermissionManager(); const grant = pm.grant({ - granted_to: "agent-1", + granted_to: "cognitive-pipeline-1", scope: sampleScope, audience: "tool:search", ttl_seconds: 0, @@ -94,7 +94,7 @@ describe("PermissionManager", () => { it("revoke() marks permission as revoked", () => { const pm = new PermissionManager(); const grant = pm.grant({ - granted_to: "agent-1", + granted_to: "cognitive-pipeline-1", scope: sampleScope, audience: "tool:search", ttl_seconds: 3600, @@ -111,7 +111,7 @@ describe("PermissionManager", () => { it("revokeAll() revokes all active permissions", () => { const pm = new PermissionManager(); const a = pm.grant({ - granted_to: "agent-1", + granted_to: "cognitive-pipeline-1", scope: sampleScope, audience: "tool:search", ttl_seconds: 3600, @@ -120,7 +120,7 @@ describe("PermissionManager", () => { granted_by: "policy:p1", }); const b = pm.grant({ - granted_to: "agent-2", + granted_to: "cognitive-pipeline-2", scope: { resource: "tool:calc", action: "execute" }, audience: "tool:calc", ttl_seconds: 3600, @@ -136,7 +136,7 @@ describe("PermissionManager", () => { it("getEvents() tracks lifecycle events", () => { const pm = new PermissionManager(); const grant = pm.grant({ - granted_to: "agent-1", + granted_to: "cognitive-pipeline-1", scope: sampleScope, audience: "tool:search", ttl_seconds: 3600, @@ -173,7 +173,7 @@ describe("PolicyEvaluator", () => { ], }; ev.addPolicy(policy); - const decision = ev.evaluate(baseRequest(), "agent-1"); + const decision = ev.evaluate(baseRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("approved"); }); @@ -192,7 +192,7 @@ describe("PolicyEvaluator", () => { }, ], }); - const decision = ev.evaluate(baseRequest(), "agent-1"); + const decision = ev.evaluate(baseRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("denied"); expect(decision.denial_reason).toBe("Search disabled"); }); @@ -212,7 +212,7 @@ describe("PolicyEvaluator", () => { }, ], }); - const decision = ev.evaluate(baseRequest(), "agent-1"); + const decision = ev.evaluate(baseRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("narrowed"); expect(decision.narrowed_scope?.constraints?.excluded_fields).toEqual([ "pii", @@ -222,7 +222,7 @@ describe("PolicyEvaluator", () => { it("default deny when no rules match", () => { const ev = new PolicyEvaluator(); - const decision = ev.evaluate(baseRequest(), "agent-1"); + const decision = ev.evaluate(baseRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("denied"); expect(decision.denial_reason).toContain("fail-closed"); }); @@ -241,7 +241,7 @@ describe("PolicyEvaluator", () => { priority: 10, rules: [{ rule_id: "d1", action: "deny", resource: "tool:search", reason: "blocked" }], }); - const decision = ev.evaluate(baseRequest(), "agent-1"); + const decision = ev.evaluate(baseRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("denied"); expect(decision.denial_reason).toBe("blocked"); }); @@ -254,7 +254,7 @@ describe("PolicyEvaluator", () => { priority: 1, rules: [{ rule_id: "w1", action: "allow", resource: "tool:*" }], }); - const decision = ev.evaluate(baseRequest(), "agent-1"); + const decision = ev.evaluate(baseRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("approved"); }); }); @@ -334,19 +334,19 @@ describe("AuditEngine", () => { const engine = new AuditEngine(); const e1 = engine.emit({ run_id: "run-1", - agent_id: "agent-1", + requester_id: "cognitive-pipeline-1", event_type: "test.a", details: { n: 1 }, }); const e2 = engine.emit({ run_id: "run-1", - agent_id: "agent-1", + requester_id: "cognitive-pipeline-1", event_type: "test.b", details: { n: 2 }, }); const e3 = engine.emit({ run_id: "run-1", - agent_id: "agent-1", + requester_id: "cognitive-pipeline-1", event_type: "test.c", details: { n: 3 }, }); @@ -361,7 +361,7 @@ describe("AuditEngine", () => { it("seal() produces audit envelope", () => { const engine = new AuditEngine(); - const state = createAgentState({ objective: "demo task" }); + const state = createPipelineState({ objective: "demo task" }); state.trace.steps.push({ id: "s-1", type: "thought", diff --git a/harness/tests/governed-agent.test.ts b/harness/tests/governed-pipeline.test.ts similarity index 89% rename from harness/tests/governed-agent.test.ts rename to harness/tests/governed-pipeline.test.ts index 8985d48..1088551 100644 --- a/harness/tests/governed-agent.test.ts +++ b/harness/tests/governed-pipeline.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect, beforeEach } from "vitest"; -import { runGovernedAgent } from "../src/agents/governed-agent.js"; -import type { GovernedAgentConfig } from "../src/agents/governed-agent.js"; +import { runGovernedPipeline } from "../src/pipelines/governed-pipeline.js"; +import type { GovernedPipelineConfig } from "../src/pipelines/governed-pipeline.js"; import { MockLLMBackend } from "../src/backends/mock.js"; import { createMockToolRegistry } from "../src/tools/mock-tools.js"; import { resetStepCounter } from "../src/core/trace-emitter.js"; @@ -15,12 +15,12 @@ const allowAllTools: PolicySet = { rules: [{ rule_id: "allow-tool-star", action: "allow", resource: "tool:*" }], }; -describe("runGovernedAgent (mock backend)", () => { +describe("runGovernedPipeline (mock backend)", () => { beforeEach(() => { resetStepCounter(); }); - function config(overrides: Partial = {}): GovernedAgentConfig { + function config(overrides: Partial = {}): GovernedPipelineConfig { const { policies, ...rest } = overrides; return { objective: "What is 2+2? Calculate it.", @@ -32,7 +32,7 @@ describe("runGovernedAgent (mock backend)", () => { } it("runs through governed FSM with tool calls", async () => { - const result = await runGovernedAgent(config()); + const result = await runGovernedPipeline(config()); const hasDelegationSignal = result.trace.steps.some( (s) => @@ -50,7 +50,7 @@ describe("runGovernedAgent (mock backend)", () => { }); it("handles no-tool reasoning", async () => { - const result = await runGovernedAgent( + const result = await runGovernedPipeline( config({ objective: "Summarize the benefits of clear documentation.", }), @@ -82,7 +82,7 @@ describe("runGovernedAgent (mock backend)", () => { ], }; - const result = await runGovernedAgent( + const result = await runGovernedPipeline( config({ objective: "Search for open source libraries.", policies: [denySearch, allowAllTools], @@ -114,7 +114,7 @@ describe("runGovernedAgent (mock backend)", () => { ], }; - const result = await runGovernedAgent( + const result = await runGovernedPipeline( config({ objective: "Search for open source design patterns.", policies: [narrowSearch], @@ -130,14 +130,14 @@ describe("runGovernedAgent (mock backend)", () => { }); it("produces valid audit envelope", async () => { - const result = await runGovernedAgent(config()); + const result = await runGovernedPipeline(config()); expect(result.envelope.trace_hash).toMatch(/^[0-9a-f]{64}$/); expect(result.envelope.integrity.content_hash).toMatch(/^[0-9a-f]{64}$/); expect(AuditEngine.verify(result.envelope)).toBe(true); }); it("tracks delegation requests in state", async () => { - const result = await runGovernedAgent(config()); + const result = await runGovernedPipeline(config()); expect(result.state.delegationRequests.length).toBeGreaterThan(0); expect(result.state.delegationDecisions.length).toBe( result.state.delegationRequests.length, @@ -171,7 +171,7 @@ describe("runGovernedAgent (mock backend)", () => { ], }; - const result = await runGovernedAgent( + const result = await runGovernedPipeline( config({ objective: "Search records.", backend, @@ -206,7 +206,7 @@ describe("runGovernedAgent (mock backend)", () => { ], }; - const result = await runGovernedAgent( + const result = await runGovernedPipeline( config({ objective: "Explain this architecture in one paragraph.", policies: [denyFinalize, allowAllTools], diff --git a/harness/tests/llm-circuit-breaker.test.ts b/harness/tests/llm-circuit-breaker.test.ts index 6fbb9a7..9b89208 100644 --- a/harness/tests/llm-circuit-breaker.test.ts +++ b/harness/tests/llm-circuit-breaker.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect } from "vitest"; import { MockLLMBackend } from "../src/backends/mock.js"; import type { BudgetPolicy } from "../src/schemas/budget.js"; -import { createAgentState } from "../src/core/state.js"; +import { createPipelineState } from "../src/core/state.js"; import { createBudgetTracker } from "../src/core/budget-tracker.js"; import { callLLMWithCircuitBreaker } from "../src/core/llm-circuit-breaker.js"; @@ -18,7 +18,7 @@ function makePolicy(maxTokens: number): BudgetPolicy { describe("callLLMWithCircuitBreaker", () => { it("stops before decode when prompt estimate exceeds remaining budget", async () => { - const state = createAgentState({ + const state = createPipelineState({ objective: "tiny budget", budgetPolicy: makePolicy(8), }); @@ -42,7 +42,7 @@ describe("callLLMWithCircuitBreaker", () => { }); it("interrupts streamed decoding when completion allowance is exhausted", async () => { - const state = createAgentState({ + const state = createPipelineState({ objective: "mid-stream budget stop", budgetPolicy: makePolicy(90), }); @@ -73,7 +73,7 @@ describe("callLLMWithCircuitBreaker", () => { }); it("enters fail_safe when streamed output exceeds safety ceiling", async () => { - const state = createAgentState({ + const state = createPipelineState({ objective: "safety ceiling stop", budgetPolicy: makePolicy(500), }); @@ -102,7 +102,7 @@ describe("callLLMWithCircuitBreaker", () => { }); it("enters fail_safe when blocked stream pattern appears", async () => { - const state = createAgentState({ + const state = createPipelineState({ objective: "pattern stop", budgetPolicy: makePolicy(500), }); diff --git a/harness/tests/manifest.test.ts b/harness/tests/manifest.test.ts index 5ea814c..b513e41 100644 --- a/harness/tests/manifest.test.ts +++ b/harness/tests/manifest.test.ts @@ -52,7 +52,7 @@ describe("buildManifest", () => { it("classifies sandbox-allowed tools as pre_authorized", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: DEFAULT_SANDBOX_CONFIG, @@ -75,7 +75,7 @@ describe("buildManifest", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox, @@ -105,7 +105,7 @@ describe("buildManifest", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: DEFAULT_SANDBOX_CONFIG, @@ -135,7 +135,7 @@ describe("buildManifest", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: DEFAULT_SANDBOX_CONFIG, @@ -150,7 +150,7 @@ describe("buildManifest", () => { it("includes budget snapshot", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: DEFAULT_SANDBOX_CONFIG, @@ -166,7 +166,7 @@ describe("buildManifest", () => { it("uses provided trust level", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: DEFAULT_SANDBOX_CONFIG, @@ -181,7 +181,7 @@ describe("buildManifest", () => { it("stores phase and IDs correctly", () => { const manifest = buildManifest({ runId: "run-42", - agentId: "agent-planner", + requesterId: "cognitive-pipeline-planner", phase: "critique_verify", toolContracts: allTools, sandbox: DEFAULT_SANDBOX_CONFIG, @@ -190,7 +190,7 @@ describe("buildManifest", () => { }); expect(manifest.run_id).toBe("run-42"); - expect(manifest.agent_id).toBe("agent-planner"); + expect(manifest.requester_id).toBe("cognitive-pipeline-planner"); expect(manifest.phase).toBe("critique_verify"); expect(manifest.manifest_id).toBeTruthy(); expect(manifest.timestamp).toBeTruthy(); @@ -199,7 +199,7 @@ describe("buildManifest", () => { it("applies tool overrides from policy engine previews", () => { const manifest = buildManifest({ runId: "run-override", - agentId: "agent-preview", + requesterId: "cognitive-pipeline-preview", phase: "plan", toolContracts: [searchContract, calcContract], sandbox: DEFAULT_SANDBOX_CONFIG, @@ -231,7 +231,7 @@ describe("manifestToCompactText", () => { it("produces readable text with markers", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: [searchContract, calcContract], sandbox: DEFAULT_SANDBOX_CONFIG, @@ -258,7 +258,7 @@ describe("manifestToCompactText", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox, @@ -287,7 +287,7 @@ describe("manifestToCompactText", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: [searchContract], sandbox: DEFAULT_SANDBOX_CONFIG, @@ -303,7 +303,7 @@ describe("manifestToCompactText", () => { it("compact text is reasonably token-efficient", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: { ...DEFAULT_SANDBOX_CONFIG, blockedTools: ["shell"] }, diff --git a/harness/tests/policy-engine-conformance.test.ts b/harness/tests/policy-engine-conformance.test.ts index dae0e1b..5410a56 100644 --- a/harness/tests/policy-engine-conformance.test.ts +++ b/harness/tests/policy-engine-conformance.test.ts @@ -42,7 +42,7 @@ function makeRequest(overrides?: Partial): DelegationRequest const base: DelegationRequest = { schema_version: "0.2", request_id: "req-conformance", - requester: "agent-conformance", + requester: "cognitive-pipeline-conformance", run_id: "run-conformance", intent: "Conformance policy check", justification: "Verify OPA decision mapping contract", @@ -87,7 +87,7 @@ describe("OPA decision conformance fixtures", () => { policyPath: "open_cot/delegation", }); const request = makeRequest(fixtureCase.request); - const decision = await engine.evaluate(request, "agent-1"); + const decision = await engine.evaluate(request, "cognitive-pipeline-1"); expect(decision.request_id).toBe(request.request_id); expect(decision.decision_id).toMatch(/^[0-9a-f]{64}$/); diff --git a/harness/tests/policy-engine-live.test.ts b/harness/tests/policy-engine-live.test.ts index c2bbfdb..cf8e892 100644 --- a/harness/tests/policy-engine-live.test.ts +++ b/harness/tests/policy-engine-live.test.ts @@ -6,7 +6,7 @@ function makeRequest(overrides?: Partial): DelegationRequest return { schema_version: "0.2", request_id: "req-live-opa", - requester: "agent-live", + requester: "cognitive-pipeline-live", run_id: "run-live", intent: "Live OPA integration check", justification: "Validate runtime OPA decision mapping", @@ -48,7 +48,7 @@ describeLive("OpaPolicyEngine live integration", () => { }, }); const request = makeRequest(); - const decision = await engine.evaluate(request, "agent-live-01"); + const decision = await engine.evaluate(request, "cognitive-pipeline-live-01"); expect(decision.request_id).toBe(request.request_id); expect(decision.decision_id).toMatch(/^[0-9a-f]{64}$/); diff --git a/harness/tests/policy-engine.test.ts b/harness/tests/policy-engine.test.ts index bed4a5d..8b28a1c 100644 --- a/harness/tests/policy-engine.test.ts +++ b/harness/tests/policy-engine.test.ts @@ -12,7 +12,7 @@ function makeRequest(overrides?: Partial): DelegationRequest return { schema_version: "0.2", request_id: "req-policy-test", - requester: "agent-test", + requester: "cognitive-pipeline-test", run_id: "run-test", intent: "Search for data", justification: "Need external evidence", @@ -46,7 +46,7 @@ describe("InProcessPolicyEngine", () => { }; const engine = new InProcessPolicyEngine([allowSearch]); - const decision = await engine.evaluate(makeRequest(), "agent-1"); + const decision = await engine.evaluate(makeRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("approved"); expect(decision.policy_refs).toEqual(["allow-search"]); }); @@ -55,7 +55,7 @@ describe("InProcessPolicyEngine", () => { const engine = new InProcessPolicyEngine([]); const decision = await engine.consultPhase?.({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", objective: "Summarize docs", phase: "frame", }); @@ -82,7 +82,7 @@ describe("InProcessPolicyEngine", () => { const engine = new InProcessPolicyEngine([denyFinalize]); const decision = await engine.consultPhase?.({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", objective: "Complete task", phase: "finalize", }); @@ -112,7 +112,7 @@ describe("InProcessPolicyEngine", () => { const engine = new InProcessPolicyEngine([policy]); const preview = await engine.previewToolAccess?.({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", objective: "Research and compute", phase: "plan", tools: [ @@ -190,7 +190,7 @@ describe("OpaPolicyEngine", () => { baseUrl: "https://opa.example", policyPath: "open_cot/delegation", }); - const decision = await engine.evaluate(makeRequest(), "agent-1"); + const decision = await engine.evaluate(makeRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("narrowed"); expect(decision.policy_refs).toEqual(["opa.search.policy"]); @@ -199,7 +199,7 @@ describe("OpaPolicyEngine", () => { const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; expect(url).toBe("https://opa.example/v1/data/open_cot/delegation"); const body = JSON.parse(String(init.body)); - expect(body.input.agent_id).toBe("agent-1"); + expect(body.input.requester_id).toBe("cognitive-pipeline-1"); expect(body.input.request.request_id).toBe("req-policy-test"); }); @@ -216,7 +216,7 @@ describe("OpaPolicyEngine", () => { baseUrl: "https://opa.example", policyPath: "open_cot/delegation", }); - const decision = await engine.evaluate(makeRequest(), "agent-1"); + const decision = await engine.evaluate(makeRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("denied"); expect(decision.denial_reason).toContain("OPA policy evaluation failed"); @@ -228,8 +228,8 @@ describe("OpaPolicyEngine", () => { const fallback: DelegationPolicyEngine = { name: "fallback", - evaluate: async (request, agentId) => - createDelegationDecision(request, agentId, { + evaluate: async (request, requesterId) => + createDelegationDecision(request, requesterId, { status: "approved", decidedBy: { kind: "harness" }, policyRefs: ["fallback-policy"], @@ -242,7 +242,7 @@ describe("OpaPolicyEngine", () => { policyPath: "open_cot/delegation", fallbackEngine: fallback, }); - const decision = await engine.evaluate(makeRequest(), "agent-1"); + const decision = await engine.evaluate(makeRequest(), "cognitive-pipeline-1"); expect(decision.status).toBe("approved"); expect(decision.policy_refs).toEqual(["fallback-policy"]); @@ -270,7 +270,7 @@ describe("OpaPolicyEngine", () => { policyPath: "open_cot/delegation", inputContext: { policy_mode: "deny", request_source: "demo" }, }); - await engine.evaluate(makeRequest(), "agent-1"); + await engine.evaluate(makeRequest(), "cognitive-pipeline-1"); const [, init] = fetchMock.mock.calls[0] as [string, RequestInit]; const body = JSON.parse(String(init.body)); diff --git a/harness/tests/toon-adapter.test.ts b/harness/tests/toon-adapter.test.ts index 9e8afcc..7bb78a3 100644 --- a/harness/tests/toon-adapter.test.ts +++ b/harness/tests/toon-adapter.test.ts @@ -302,7 +302,7 @@ describe("manifestToToon", () => { it("produces TOON output with markers", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: [searchContract, calcContract, writeContract], sandbox: DEFAULT_SANDBOX_CONFIG, @@ -330,7 +330,7 @@ describe("manifestToToon", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox, @@ -345,7 +345,7 @@ describe("manifestToToon", () => { it("is more compact than JSON", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: { ...DEFAULT_SANDBOX_CONFIG, blockedTools: ["shell"] }, @@ -361,7 +361,7 @@ describe("manifestToToon", () => { it("TOON word count stays under 100 for typical manifest", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: allTools, sandbox: { ...DEFAULT_SANDBOX_CONFIG, blockedTools: ["shell"] }, @@ -380,7 +380,7 @@ describe("manifestToToon", () => { describe("serializeManifest", () => { const manifest = buildManifest({ runId: "run-1", - agentId: "agent-1", + requesterId: "cognitive-pipeline-1", phase: "frame", toolContracts: [searchContract, calcContract], sandbox: DEFAULT_SANDBOX_CONFIG, diff --git a/harness/tests/trace-emitter.test.ts b/harness/tests/trace-emitter.test.ts index fae7cf1..aab97cd 100644 --- a/harness/tests/trace-emitter.test.ts +++ b/harness/tests/trace-emitter.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, beforeEach } from "vitest"; -import { createAgentState } from "../src/core/state.js"; +import { createPipelineState } from "../src/core/state.js"; import { emitThought, emitPlan, @@ -18,7 +18,7 @@ describe("TraceEmitter", () => { }); it("emitThought appends a thought step", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const step = emitThought(state, "thinking..."); expect(step.type).toBe("thought"); expect(step.content).toBe("thinking..."); @@ -26,7 +26,7 @@ describe("TraceEmitter", () => { }); it("emitPlan increments planVersion", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); expect(state.planVersion).toBe(0); emitPlan(state, "plan v1"); expect(state.planVersion).toBe(1); @@ -35,7 +35,7 @@ describe("TraceEmitter", () => { }); it("emitAction records tool invocation and sets lastAction", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const step = emitAction(state, "call:search", { tool_name: "search", arguments: { query: "test" }, @@ -47,7 +47,7 @@ describe("TraceEmitter", () => { }); it("emitObservation sets parent and adds to evidence", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const action = emitAction(state, "call:search", { tool_name: "search", arguments: {}, @@ -59,25 +59,25 @@ describe("TraceEmitter", () => { }); it("emitCritique adds a critique step", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const step = emitCritique(state, "something is wrong"); expect(step.type).toBe("critique"); }); it("emitVerify sets verification_status", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const step = emitVerify(state, "looks good", "verified"); expect(step.verification_status).toBe("verified"); }); it("emitSummary adds a summarize step", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const step = emitSummary(state, "done with everything"); expect(step.type).toBe("summarize"); }); it("finalizeTrace sets final_answer and termination", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); state.completionStatus = "succeeded"; const trace = finalizeTrace(state, "42"); expect(trace.final_answer).toBe("42"); @@ -85,7 +85,7 @@ describe("TraceEmitter", () => { }); it("step IDs are unique and monotonically increasing", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const s1 = emitThought(state, "a"); const s2 = emitThought(state, "b"); const s3 = emitThought(state, "c"); diff --git a/harness/tests/transitions.test.ts b/harness/tests/transitions.test.ts index b2c954f..f3c6b12 100644 --- a/harness/tests/transitions.test.ts +++ b/harness/tests/transitions.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { VALID_TRANSITIONS, ALL_PHASES } from "../src/schemas/agent-loop.js"; +import { VALID_TRANSITIONS, ALL_PHASES } from "../src/schemas/cognitive-pipeline.js"; import { canTransition, assertTransition, @@ -7,7 +7,7 @@ import { forceStop, InvalidTransitionError, TerminalStateError, - createAgentState, + createPipelineState, } from "../src/core/index.js"; describe("FSM transitions", () => { @@ -51,7 +51,7 @@ describe("FSM transitions", () => { describe("transition()", () => { it("moves state to the target phase", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); expect(state.phase).toBe("receive"); transition(state, "frame", "start"); transition(state, "plan", "ready"); @@ -60,7 +60,7 @@ describe("FSM transitions", () => { }); it("appends a trace step", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); const before = state.trace.steps.length; transition(state, "frame", "starting work"); expect(state.trace.steps.length).toBe(before + 1); @@ -68,13 +68,13 @@ describe("FSM transitions", () => { }); it("updates nextAllowedPhases", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); transition(state, "frame", "go"); expect(state.nextAllowedPhases).toEqual(expect.arrayContaining(["plan"])); }); it("sets completionStatus to succeeded when sealing normally", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); transition(state, "frame", "go"); transition(state, "plan", "go"); transition(state, "finalize", "wrap"); @@ -86,7 +86,7 @@ describe("FSM transitions", () => { describe("forceStop()", () => { it("forces state to audit_seal with the given status", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); forceStop(state, "budget_exhausted", "tokens ran out"); expect(state.phase).toBe("audit_seal"); expect(state.completionStatus).toBe("budget_exhausted"); @@ -94,7 +94,7 @@ describe("FSM transitions", () => { }); it("is idempotent on already-stopped state", () => { - const state = createAgentState({ objective: "test" }); + const state = createPipelineState({ objective: "test" }); forceStop(state, "external_stop", "user abort"); const stepCount = state.trace.steps.length; forceStop(state, "failed", "double stop"); @@ -105,9 +105,9 @@ describe("FSM transitions", () => { describe("full FSM path", () => { it("supports receive -> frame -> plan -> finalize -> audit_seal", () => { - const state = createAgentState({ objective: "full path" }); + const state = createPipelineState({ objective: "full path" }); transition(state, "frame", "gather context"); - transition(state, "plan", "decide"); + transition(state, "plan", "reconcile"); transition(state, "finalize", "wrap up"); transition(state, "audit_seal", "done"); expect(state.phase).toBe("audit_seal"); @@ -115,7 +115,7 @@ describe("FSM transitions", () => { }); it("supports critique_verify -> plan -> execute_tool -> observe_result loop", () => { - const state = createAgentState({ objective: "repair loop" }); + const state = createPipelineState({ objective: "repair loop" }); transition(state, "frame", "f"); transition(state, "plan", "p"); transition(state, "execute_tool", "run"); diff --git a/reference/python/LOOP_CONTRACT.md b/reference/python/LOOP_CONTRACT.md index fbe6d91..5db6c5a 100644 --- a/reference/python/LOOP_CONTRACT.md +++ b/reference/python/LOOP_CONTRACT.md @@ -1,6 +1,6 @@ # Minimal Loop Transcript Contract (RFC 0007 aligned) -This contract defines the minimum shape for a deterministic agent-loop transcript used in 0.1 tests. +This contract defines the minimum shape for a deterministic cognitive-pipeline transcript used in 0.1 tests. ## Required top-level fields diff --git a/reference/python/__init__.py b/reference/python/__init__.py index 81e3a04..1378ac1 100644 --- a/reference/python/__init__.py +++ b/reference/python/__init__.py @@ -1,6 +1,6 @@ """Reference Python package for Open CoT.""" -from .agent_loop_runner import run_mock_agent_loop +from .cognitive_pipeline_runner import run_mock_cognitive_pipeline from .generator import empty_trace from .parser import iter_traces_jsonl, parse_trace from .validator import load_schema, validate_trace @@ -10,6 +10,6 @@ "iter_traces_jsonl", "load_schema", "parse_trace", - "run_mock_agent_loop", + "run_mock_cognitive_pipeline", "validate_trace", ] diff --git a/reference/python/agent_loop_runner.py b/reference/python/cognitive_pipeline_runner.py similarity index 92% rename from reference/python/agent_loop_runner.py rename to reference/python/cognitive_pipeline_runner.py index a454ee0..a635569 100644 --- a/reference/python/agent_loop_runner.py +++ b/reference/python/cognitive_pipeline_runner.py @@ -1,4 +1,4 @@ -"""Deterministic mock agent loop runner.""" +"""Deterministic mock cognitive pipeline runner.""" from __future__ import annotations @@ -10,7 +10,7 @@ from .mock_verifier import MockVerifier -def run_mock_agent_loop(task: str) -> tuple[dict[str, Any], dict[str, Any]]: +def run_mock_cognitive_pipeline(task: str) -> tuple[dict[str, Any], dict[str, Any]]: llm = MockLLM() tools = MockToolbox() verifier = MockVerifier() diff --git a/reference/python/mock_llm.py b/reference/python/mock_llm.py index 3750498..6dd030e 100644 --- a/reference/python/mock_llm.py +++ b/reference/python/mock_llm.py @@ -1,4 +1,4 @@ -"""Deterministic mock LLM for agent loop integration tests.""" +"""Deterministic mock LLM for cognitive pipeline integration tests.""" from __future__ import annotations diff --git a/rfcs/0001-initial-schema.md b/rfcs/0001-initial-schema.md index d1b5654..b1a529b 100644 --- a/rfcs/0001-initial-schema.md +++ b/rfcs/0001-initial-schema.md @@ -16,7 +16,7 @@ It provides a minimal core for: - tool-augmented reasoning - verifier sidecars - benchmark and training datasets -- deterministic replay in agent loops +- deterministic replay in cognitive pipelines --- @@ -32,7 +32,7 @@ It provides a minimal core for: ### 2.2 Non-goals - Defining a specific training recipe. -- Requiring one universal agent runtime. +- Requiring one universal cognitive pipeline runtime. - Standardizing hidden model internals. --- @@ -122,7 +122,7 @@ Each step may include: ### 6.1 Scope expansion fields -- **Decision:** Keep token timing, model metadata, RL rewards, and multi-agent fields optional and out of the required core object. +- **Decision:** Keep token timing, model metadata, RL rewards, and multi-party fields optional and out of the required core object. - **Rationale:** Preserves broad interoperability and avoids forcing runtime-specific internals. - **Normative requirement:** Core traces **MUST** validate with only fields in this RFC. Additional fields **MAY** be attached as extensions or linked sidecars. - **Migration note:** Future versions can promote extensions to first-class fields only with a major compatibility review. @@ -150,7 +150,7 @@ This RFC is accepted when: - At least 3 maintainers approve it. - A reference validator passes against the schema. - At least one example dataset uses this format. -- At least one agent loop implementation emits schema-valid traces. +- At least one cognitive pipeline implementation emits schema-valid traces. --- diff --git a/rfcs/0002-verifier-output-schema.md b/rfcs/0002-verifier-output-schema.md index ae2862d..f62e68b 100644 --- a/rfcs/0002-verifier-output-schema.md +++ b/rfcs/0002-verifier-output-schema.md @@ -36,7 +36,7 @@ Modern reasoning models increasingly rely on **verifiable intermediate steps**: - Qwen‑R1 uses step‑level reward shaping. - “Let’s Verify Step by Step” shows verification dramatically improves accuracy. - Math and logic tasks require symbolic correctness. -- Agent frameworks need to validate tool outputs and reasoning transitions. +- Cognitive pipeline frameworks need to validate tool outputs and reasoning transitions. However, there is **no open standard** for representing verifier outputs. diff --git a/rfcs/0003-tool-invocation-schema.md b/rfcs/0003-tool-invocation-schema.md index df533e8..86e019b 100644 --- a/rfcs/0003-tool-invocation-schema.md +++ b/rfcs/0003-tool-invocation-schema.md @@ -1,31 +1,31 @@ # RFC 0003 — Tool Invocation Schema (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.1 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.1 **Discussion:** https://github.com/supernovae/open-cot/discussions/3 --- ## 1. Summary -This RFC proposes the **Tool Invocation Schema**, a structured, model‑agnostic format for representing tool calls within reasoning traces. +This RFC proposes the **Tool Invocation Schema**, a structured, model‑agnostic format for representing tool calls within reasoning traces. It extends RFC 0001 (Reasoning Schema) by defining how LLMs should express: -- tool selection -- tool arguments -- tool execution -- tool outputs -- error handling -- integration with reasoning steps +- tool selection +- tool arguments +- tool execution +- tool outputs +- error handling +- integration with reasoning steps This schema is compatible with: -- **ReAct** (Thought → Action → Observation) -- **OpenAI function calling** -- **JSON‑based tool APIs** -- **LangChain / LangGraph tool nodes** -- **agentic coding frameworks** -- **R1‑style RL training with tool feedback** +- **ReAct** (Thought → Action → Observation) +- **OpenAI function calling** +- **JSON‑based tool APIs** +- **LangChain / LangGraph tool nodes** +- **agentic coding frameworks** +- **R1‑style RL training with tool feedback** --- @@ -33,18 +33,18 @@ This schema is compatible with: Tool use is now a core part of modern LLM reasoning: -- ReAct introduced interleaved reasoning + actions. -- Agent frameworks rely on structured tool calls. -- Coding agents require deterministic tool invocation formats. -- RL‑trained reasoning models use tool feedback as reward signals. +- ReAct introduced interleaved reasoning + actions. +- Cognitive pipeline frameworks rely on structured tool calls. +- Coding pipelines require deterministic tool invocation formats. +- RL‑trained reasoning models use tool feedback as reward signals. - Multi‑step planning requires consistent action/observation structure. However: -- Every framework uses a different tool schema. -- Tool calls are often embedded in unstructured text. -- Observations are inconsistently formatted. -- Error handling is ad‑hoc. +- Every framework uses a different tool schema. +- Tool calls are often embedded in unstructured text. +- Observations are inconsistently formatted. +- Error handling is ad‑hoc. - No open standard exists for tool invocation within reasoning traces. This RFC defines a **unified, interoperable, verifiable** schema for tool use. @@ -54,18 +54,18 @@ This RFC defines a **unified, interoperable, verifiable** schema for tool use. ## 3. Design Goals ### 3.1 Must‑Have Goals -- **Compatible with RFC 0001** (step IDs, structure). -- **Supports ReAct** (thought → action → observation). -- **Supports JSON‑based tool APIs**. -- **Supports deterministic parsing**. -- **Supports error reporting**. -- **Supports multi‑tool workflows**. -- **Extensible** for future agent frameworks. +- **Compatible with RFC 0001** (step IDs, structure). +- **Supports ReAct** (thought → action → observation). +- **Supports JSON‑based tool APIs**. +- **Supports deterministic parsing**. +- **Supports error reporting**. +- **Supports multi‑tool workflows**. +- **Extensible** for future cognitive pipeline frameworks. ### 3.2 Non‑Goals -- Defining tool semantics. -- Defining a universal tool registry. -- Enforcing a specific agent loop. +- Defining tool semantics. +- Defining a universal tool registry. +- Enforcing a specific cognitive pipeline. - Encoding proprietary tool metadata. --- @@ -74,17 +74,17 @@ This RFC defines a **unified, interoperable, verifiable** schema for tool use. A tool invocation consists of: -1. **Action Step** - - specifies the tool - - includes arguments - - references the reasoning step that triggered it +1. **Action Step** + - specifies the tool + - includes arguments + - references the reasoning step that triggered it -2. **Observation Step** - - contains the tool output - - may include structured or unstructured results - - may include error information +2. **Observation Step** + - contains the tool output + - may include structured or unstructured results + - may include error information -This mirrors ReAct and modern agent frameworks. +This mirrors ReAct and modern cognitive pipeline frameworks. --- @@ -265,5 +265,5 @@ This RFC will be accepted when: At least 3 maintainers approve it. A reference implementation can parse and validate tool invocations. -At least one agent loop emits this schema. +At least one cognitive pipeline emits this schema. At least one dataset includes tool‑augmented reasoning traces. \ No newline at end of file diff --git a/rfcs/0004-branching-reasoning-schema.md b/rfcs/0004-branching-reasoning-schema.md index 95c4ed1..a84226d 100644 --- a/rfcs/0004-branching-reasoning-schema.md +++ b/rfcs/0004-branching-reasoning-schema.md @@ -1,23 +1,23 @@ # RFC 0004 — Branching Reasoning Extensions (ToT / GoT) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.2 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.2 **Discussion:** https://github.com/supernovae/open-cot/discussions/4 --- ## 1. Summary -This RFC proposes **branching and graph‑structured reasoning extensions** to the Open CoT Reasoning Schema. +This RFC proposes **branching and graph‑structured reasoning extensions** to the Open CoT Reasoning Schema. It enables representation of: -- **Tree‑of‑Thoughts (ToT)** -- **Graph‑of‑Thoughts (GoT)** -- **multi‑path exploration** -- **search‑based reasoning** -- **beam search / BFS / DFS reasoning** -- **pruned branches** -- **branch scoring and selection** +- **Tree‑of‑Thoughts (ToT)** +- **Graph‑of‑Thoughts (GoT)** +- **multi‑path exploration** +- **search‑based reasoning** +- **beam search / BFS / DFS reasoning** +- **pruned branches** +- **branch scoring and selection** These extensions build on RFC 0001 (Initial Reasoning Schema) and introduce new fields and conventions for representing non‑linear reasoning. @@ -27,21 +27,21 @@ These extensions build on RFC 0001 (Initial Reasoning Schema) and introduce new Linear Chain‑of‑Thought is insufficient for many reasoning tasks: -- Complex planning -- Mathematical proofs -- Multi‑step coding tasks -- Search problems -- Multi‑hypothesis reasoning -- RL‑trained long‑horizon reasoning +- Complex planning +- Mathematical proofs +- Multi‑step coding tasks +- Search problems +- Multi‑hypothesis reasoning +- RL‑trained long‑horizon reasoning Research such as **Tree‑of‑Thoughts** and **Graph‑of‑Thoughts** demonstrates that **branching exploration** significantly improves performance. However: -- No open standard exists for representing branching reasoning. -- Existing implementations use ad‑hoc formats. -- Agent frameworks cannot interoperate. -- RL pipelines cannot train on structured search traces. +- No open standard exists for representing branching reasoning. +- Existing implementations use ad‑hoc formats. +- Cognitive pipeline frameworks cannot interoperate. +- RL pipelines cannot train on structured search traces. - Datasets cannot represent multi‑path reasoning. This RFC defines a **unified, interoperable, graph‑friendly extension** to the reasoning schema. @@ -51,18 +51,18 @@ This RFC defines a **unified, interoperable, graph‑friendly extension** to the ## 3. Design Goals ### 3.1 Must‑Have Goals -- Support **trees** (ToT) and **graphs** (GoT). -- Support **branch scoring** and **pruning**. -- Support **multiple candidate paths**. -- Maintain compatibility with RFC 0001. -- Allow **partial or full exploration traces**. -- Support **search algorithms** (BFS, DFS, beam search). +- Support **trees** (ToT) and **graphs** (GoT). +- Support **branch scoring** and **pruning**. +- Support **multiple candidate paths**. +- Maintain compatibility with RFC 0001. +- Allow **partial or full exploration traces**. +- Support **search algorithms** (BFS, DFS, beam search). - Support **RL reward propagation** across branches. ### 3.2 Non‑Goals -- Defining a specific search algorithm. -- Enforcing a particular branching strategy. -- Representing full agent state machines. +- Defining a specific search algorithm. +- Enforcing a particular branching strategy. +- Representing full cognitive pipeline state machines. - Encoding model weights or proprietary metadata. --- @@ -71,19 +71,19 @@ This RFC defines a **unified, interoperable, graph‑friendly extension** to the A reasoning trace may contain: -- **nodes** (steps) -- **edges** (parent → child relationships) -- **branch groups** (sets of alternative paths) -- **branch scores** (model‑assigned or verifier‑assigned) -- **pruned branches** (optional) +- **nodes** (steps) +- **edges** (parent → child relationships) +- **branch groups** (sets of alternative paths) +- **branch scores** (model‑assigned or verifier‑assigned) +- **pruned branches** (optional) This RFC introduces: -- `branch_group` -- `branch_score` -- `pruned` -- `exploration_strategy` -- `path_id` +- `branch_group` +- `branch_score` +- `pruned` +- `exploration_strategy` +- `path_id` These fields extend the existing step structure. @@ -225,7 +225,7 @@ This RFC will be accepted when: At least 3 maintainers approve it. A reference implementation can parse branching traces. At least one ToT or GoT example dataset is converted. -At least one agent loop emits branching traces. +At least one cognitive pipeline emits branching traces. ## 10. Conclusion diff --git a/rfcs/0007-agent-loop-protocol.md b/rfcs/0007-cognitive-pipeline-protocol.md similarity index 83% rename from rfcs/0007-agent-loop-protocol.md rename to rfcs/0007-cognitive-pipeline-protocol.md index 9c0260f..016fc88 100644 --- a/rfcs/0007-agent-loop-protocol.md +++ b/rfcs/0007-cognitive-pipeline-protocol.md @@ -1,4 +1,4 @@ -# RFC 0007 — Governed Execution FSM (Agent Loop Protocol) +# RFC 0007 — Governed Execution FSM (Cognitive Pipeline Protocol) **Status:** Draft **Author:** Byron / Open CoT Community @@ -10,7 +10,7 @@ ## 1. Summary -This RFC replaces the earlier linear **Agent Loop Protocol** with the **Governed Execution FSM**: a fourteen-state finite state machine that defines how Open CoT agents move from accepted input to sealed audit output under explicit authority, policy, and tooling constraints. +This RFC replaces the earlier linear **Cognitive Pipeline Protocol** with the **Governed Execution FSM**: a fourteen-state finite state machine that defines how Open CoT pipelines move from accepted input to sealed audit output under explicit authority, policy, and tooling constraints. Open CoT is positioned as a **cognitive control plane**. The FSM is the normative contract between model output (proposals only), schema validation, harness enforcement, policy evaluation, delegation, tool execution, and audit. The governing principle is strict: **the model cannot self-authorize**. The model may request capabilities; only the harness, policy engine, and authorized brokers may grant, narrow, or deny them. @@ -20,9 +20,9 @@ This document specifies state semantics, allowed transitions, receipt obligation ## 2. Motivation -Ad-hoc agent loops conflate planning, permissioning, tool use, and completion. That makes it easy for a model’s natural-language output to be mistaken for authorization, for tool calls to run without an auditable grant chain, and for policy to be applied inconsistently before versus after side effects occur. +Ad-hoc cognitive pipelines conflate planning, permissioning, tool use, and completion. That makes it easy for a model’s natural-language output to be mistaken for authorization, for tool calls to run without an auditable grant chain, and for policy to be applied inconsistently before versus after side effects occur. -A governed FSM separates **proposal** from **commitment**: structured envelopes express intent; the harness validates against schema; policy decides; an auth broker narrows grants; tools run only in one state with consumed permissions; receipts prove what happened; a terminal audit state seals integrity. Implementers get a single interoperable execution backbone suitable for regulated, cost-aware, and sandboxed deployments (see RFC 0017, RFC 0038). +A governed FSM separates **proposal** from **commitment**: structured envelopes express intent; the harness validates against schema; policy evaluates; an auth broker narrows grants; tools run only in one state with consumed permissions; receipts prove what happened; a terminal audit state seals integrity. Implementers get a single interoperable execution backbone suitable for regulated, cost-aware, and sandboxed deployments (see RFC 0017, RFC 0038). --- @@ -47,7 +47,7 @@ A governed FSM separates **proposal** from **commitment**: structured envelopes ## 4. Architectural thesis -Normative separation of roles: **models propose**; **schemas express**; **harnesses validate and drive state**; **policy decides** at consultation points (with per-capability evaluation in `validate_authority`); **auth brokers narrow** to `AuthorityReceipt` with `granted_scope ≤ requested_scope` (non-forwardable by default); **tools execute only** in `execute_tool` under valid grant with **atomic** permission consumption (RFC 0048); **audit seals** the trace in `audit_seal`. +Normative separation of roles: **models propose**; **schemas express**; **harnesses validate and drive state**; **policy evaluates** at consultation points (with per-capability evaluation in `validate_authority`); **auth brokers narrow** to `AuthorityReceipt` with `granted_scope ≤ requested_scope` (non-forwardable by default); **tools execute only** in `execute_tool` under valid grant with **atomic** permission consumption (RFC 0048); **audit seals** the trace in `audit_seal`. --- @@ -88,7 +88,7 @@ Implementations **MUST** consult policy (RFC 0041) at minimum on entry to: `fram ## 7. Cross-references -Extends: **RFC 0001** ([Reasoning / trace structure](0001-initial-schema.md)); **RFC 0003** ([Tool invocation](0003-tool-invocation-schema.md)); **RFC 0017** ([Sandbox](0017-agent-safety-sandboxing.md)); **RFC 0038** ([Budget](0038-cost-aware-reasoning-budget.md) → `termination: budget_exhausted`); **RFC 0041** ([Policy](0041-policy-enforcement-schema.md)); **RFC 0042** ([Permissions](0042-permission-acl.md)). Delegation payloads (**RFC 0047**) and receipts / audit envelope (**RFC 0048**) are authoritative for artifact bodies; **this RFC owns control flow and attachment points**. +Extends: **RFC 0001** ([Reasoning / trace structure](0001-initial-schema.md)); **RFC 0003** ([Tool invocation](0003-tool-invocation-schema.md)); **RFC 0017** ([Sandbox](0017-runtime-safety-sandboxing.md)); **RFC 0038** ([Budget](0038-cost-aware-reasoning-budget.md) → `termination: budget_exhausted`); **RFC 0041** ([Policy](0041-policy-enforcement-schema.md)); **RFC 0042** ([Permissions](0042-permission-acl.md)). Delegation payloads (**RFC 0047**) and receipts / audit envelope (**RFC 0048**) are authoritative for artifact bodies; **this RFC owns control flow and attachment points**. --- @@ -279,8 +279,8 @@ Illustrative trace (artifact shapes per RFCs 0001, 0003, 0047, 0048): **`receive ### 10.1 FSM flexibility versus strict sequencing - **Question:** Must every tool pass through `request_authority` even when policy already allows a tool class? -- **Decision:** The transition map is **normative**, but agents **MAY** skip delegation states for capabilities covered by **standing authorization**, using the **`plan -> execute_tool`** shortcut. The harness **MUST** record how standing authorization satisfies the dispatch obligation in the tool execution receipt. -- **Rationale:** Keeps enterprise-grade governance while preserving the ergonomics of a simple sandboxed agent loop. +- **Decision:** The transition map is **normative**, but pipelines **MAY** skip delegation states for capabilities covered by **standing authorization**, using the **`plan -> execute_tool`** shortcut. The harness **MUST** record how standing authorization satisfies the dispatch obligation in the tool execution receipt. +- **Rationale:** Keeps enterprise-grade governance while preserving the ergonomics of a simple sandboxed cognitive pipeline. ### 10.2 Multi-tool execution @@ -309,4 +309,4 @@ This RFC should be considered ready for **Implementers’ Draft** when: ## 12. Conclusion -The Governed Execution FSM turns the Open CoT agent loop into an explicit, permission-aware control plane: proposals are typed and validated, authority is brokered and narrowed, tools run only under receipts, failures quarantine unsafe knowledge, and every run seals into an auditable envelope. Together with RFCs 0001, 0003, 0017, 0038, 0041, 0042, 0047, and 0048, it provides a serious, implementable standard for trustworthy agent execution. +The Governed Execution FSM turns the Open CoT cognitive pipeline into an explicit, permission-aware control plane: proposals are typed and validated, authority is brokered and narrowed, tools run only under receipts, failures quarantine unsafe knowledge, and every run seals into an auditable envelope. Together with RFCs 0001, 0003, 0017, 0038, 0041, 0042, 0047, and 0048, it provides a serious, implementable standard for trustworthy cognitive pipeline execution. diff --git a/rfcs/0008-dataset-packaging-standard.md b/rfcs/0008-dataset-packaging-standard.md index 3877dcf..9ac9f31 100644 --- a/rfcs/0008-dataset-packaging-standard.md +++ b/rfcs/0008-dataset-packaging-standard.md @@ -1,8 +1,8 @@ # RFC 0008 — Dataset Packaging Standard (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.3 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.3 **Discussion:** https://github.com/supernovae/open-cot/discussions/8 --- @@ -12,14 +12,14 @@ This RFC defines the **Dataset Packaging Standard** for distributing, sharing, a It provides a unified, model‑agnostic format for packaging: -- reasoning traces (RFC 0001) -- verifier outputs (RFC 0002) -- tool invocation logs (RFC 0003) -- branching structures (RFC 0004) -- RL reward traces (RFC 0005) -- multi‑verifier ensembles (RFC 0006) +- reasoning traces (RFC 0001) +- verifier outputs (RFC 0002) +- tool invocation logs (RFC 0003) +- branching structures (RFC 0004) +- RL reward traces (RFC 0005) +- multi‑verifier ensembles (RFC 0006) -The goal is to make reasoning datasets **portable, reproducible, inspectable, and interoperable** across training pipelines, evaluation harnesses, and agent frameworks. +The goal is to make reasoning datasets **portable, reproducible, inspectable, and interoperable** across training pipelines, evaluation harnesses, and cognitive pipeline frameworks. --- @@ -27,46 +27,46 @@ The goal is to make reasoning datasets **portable, reproducible, inspectable, an Reasoning datasets today are fragmented: -- Some contain raw CoT text with no structure. -- Some contain tool calls but no observations. -- Some contain verifier labels but no reward signals. -- Some contain branching traces but no metadata. -- Some are stored as loose JSON files with no manifest. +- Some contain raw CoT text with no structure. +- Some contain tool calls but no observations. +- Some contain verifier labels but no reward signals. +- Some contain branching traces but no metadata. +- Some are stored as loose JSON files with no manifest. This fragmentation makes it difficult to: -- train RL‑based reasoning models -- evaluate step‑level correctness -- reproduce agent trajectories -- share datasets across frameworks -- benchmark models consistently +- train RL‑based reasoning models +- evaluate step‑level correctness +- reproduce cognitive pipeline trajectories +- share datasets across frameworks +- benchmark models consistently This RFC defines a **standard packaging format** so that reasoning datasets can be: -- versioned -- validated -- merged -- sharded -- streamed -- consumed by any training or evaluation pipeline +- versioned +- validated +- merged +- sharded +- streamed +- consumed by any training or evaluation pipeline --- ## 3. Design Goals ### 3.1 Must‑Have Goals -- Support all schemas from RFC 0001–0007. -- Provide a clear directory structure. -- Provide a dataset manifest. -- Support sharding and streaming. -- Support dataset‑level metadata. -- Support dataset validation. -- Support partial datasets (e.g., traces only, rewards only). +- Support all schemas from RFC 0001–0007. +- Provide a clear directory structure. +- Provide a dataset manifest. +- Support sharding and streaming. +- Support dataset‑level metadata. +- Support dataset validation. +- Support partial datasets (e.g., traces only, rewards only). ### 3.2 Non‑Goals -- Defining a specific training format (e.g., HF datasets). -- Defining a specific compression format. -- Defining a universal licensing model. +- Defining a specific training format (e.g., HF datasets). +- Defining a specific compression format. +- Defining a universal licensing model. - Encoding model weights or training logs. --- diff --git a/rfcs/0009-reward-fusion-specification.md b/rfcs/0009-reward-fusion-specification.md index 3354415..859f0d4 100644 --- a/rfcs/0009-reward-fusion-specification.md +++ b/rfcs/0009-reward-fusion-specification.md @@ -1,8 +1,8 @@ # RFC 0009 — Reward Fusion Specification (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.3 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.3 **Discussion:** https://github.com/supernovae/open-cot/discussions/9 --- @@ -12,18 +12,18 @@ This RFC defines the **Reward Fusion Specification**, a standard for combining m It extends: -- RFC 0005 — RL Reward Trace Schema -- RFC 0006 — Multi‑Verifier Ensemble Schema -- RFC 0007 — Agent Loop Protocol +- RFC 0005 — RL Reward Trace Schema +- RFC 0006 — Multi‑Verifier Ensemble Schema +- RFC 0007 — Cognitive Pipeline Protocol The goal is to provide a **consistent, transparent, and reproducible** method for merging: -- verifier‑based rewards -- heuristic rewards -- human feedback rewards -- branch‑level rewards -- trajectory‑level rewards -- ensemble‑derived rewards +- verifier‑based rewards +- heuristic rewards +- human feedback rewards +- branch‑level rewards +- trajectory‑level rewards +- ensemble‑derived rewards into a single, fused reward signal. @@ -33,39 +33,39 @@ into a single, fused reward signal. Modern reasoning models (DeepSeek‑R1, Qwen‑R1, etc.) rely on **multiple reward sources**, including: -- symbolic verifiers -- neural verifiers -- rule‑based heuristics -- human preference models -- branch‑level search scores -- trajectory‑level correctness signals +- symbolic verifiers +- neural verifiers +- rule‑based heuristics +- human preference models +- branch‑level search scores +- trajectory‑level correctness signals However: -- Reward signals often conflict. -- Pipelines use incompatible fusion strategies. -- RL training requires a single scalar reward per step or trajectory. -- Datasets cannot share fused reward traces without a standard. -- Ensemble verifiers (RFC 0006) require downstream fusion. +- Reward signals often conflict. +- Pipelines use incompatible fusion strategies. +- RL training requires a single scalar reward per step or trajectory. +- Datasets cannot share fused reward traces without a standard. +- Ensemble verifiers (RFC 0006) require downstream fusion. -This RFC defines a **unified reward fusion standard** to ensure interoperability across datasets, RL pipelines, and agent frameworks. +This RFC defines a **unified reward fusion standard** to ensure interoperability across datasets, RL pipelines, and cognitive pipeline frameworks. --- ## 3. Design Goals ### 3.1 Must‑Have Goals -- Support step‑level, branch‑level, and trajectory‑level reward fusion. -- Support multiple fusion strategies. -- Support weighted and confidence‑weighted fusion. -- Support deterministic and reproducible fusion. -- Integrate cleanly with RFC 0005 and RFC 0006. +- Support step‑level, branch‑level, and trajectory‑level reward fusion. +- Support multiple fusion strategies. +- Support weighted and confidence‑weighted fusion. +- Support deterministic and reproducible fusion. +- Integrate cleanly with RFC 0005 and RFC 0006. - Support RL‑ready scalar reward outputs. ### 3.2 Non‑Goals -- Defining a universal reward function. -- Mandating a specific RL algorithm. -- Encoding model weights or training logs. +- Defining a universal reward function. +- Mandating a specific RL algorithm. +- Encoding model weights or training logs. - Replacing verifier outputs or ensemble outputs. --- @@ -74,13 +74,13 @@ This RFC defines a **unified reward fusion standard** to ensure interoperability Reward fusion operates over three levels: -1. **Step‑level fusion** +1. **Step‑level fusion** Combine rewards for individual reasoning steps. -2. **Branch‑level fusion** +2. **Branch‑level fusion** Combine rewards for alternative reasoning paths (ToT/GoT). -3. **Trajectory‑level fusion** +3. **Trajectory‑level fusion** Combine global rewards for the entire reasoning trace. Each level may use different fusion strategies. @@ -283,9 +283,9 @@ User‑defined fusion logic (metadata required). This RFC will be accepted when: -- At least 3 maintainers approve it. -- A reference implementation performs reward fusion. -- At least one RL pipeline consumes fused rewards. +- At least 3 maintainers approve it. +- A reference implementation performs reward fusion. +- At least one RL pipeline consumes fused rewards. - At least one dataset includes fused reward traces. --- @@ -294,8 +294,8 @@ This RFC will be accepted when: This RFC defines the **Reward Fusion Specification**, enabling: -- unified reward signals -- multi‑verifier integration -- RL‑ready reward traces -- consistent evaluation -- reproducible training +- unified reward signals +- multi‑verifier integration +- RL‑ready reward traces +- consistent evaluation +- reproducible training diff --git a/rfcs/0010-agent-memory-schema.md b/rfcs/0010-cognitive-context-schema.md similarity index 85% rename from rfcs/0010-agent-memory-schema.md rename to rfcs/0010-cognitive-context-schema.md index b807371..d1f7b3b 100644 --- a/rfcs/0010-agent-memory-schema.md +++ b/rfcs/0010-cognitive-context-schema.md @@ -1,4 +1,4 @@ -# RFC 0010 — Agent Memory Schema (v0.1) +# RFC 0010 — Cognitive pipeline Memory Schema (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,22 +8,22 @@ ## 1. Summary -This RFC defines the **Agent Memory Schema**, a structured, model‑agnostic format for representing persistent and ephemeral memory used by LLM‑based agents during reasoning, planning, and tool‑augmented execution. +This RFC defines the **Cognitive pipeline Memory Schema**, a structured, model‑agnostic format for representing persistent and ephemeral memory used by LLM‑based pipelines during reasoning, planning, and tool‑augmented execution. It extends and complements: - RFC 0001 — Reasoning Schema - RFC 0003 — Tool Invocation Schema - RFC 0004 — Branching Reasoning Extensions -- RFC 0007 — Agent Loop Protocol +- RFC 0007 — Cognitive Pipeline Protocol -The goal is to standardize how agents store, retrieve, update, and serialize memory across steps, episodes, and tasks. +The goal is to standardize how pipelines store, retrieve, update, and serialize memory across steps, episodes, and tasks. --- ## 2. Motivation -Modern agents require memory for: +Modern pipelines require memory for: - tracking intermediate results - storing retrieved knowledge @@ -33,7 +33,7 @@ Modern agents require memory for: - tracking failures and retries - maintaining long‑horizon context - supporting multi‑step planning -- enabling multi‑agent collaboration +- enabling multi‑cognitive pipeline collaboration Today, memory formats are: @@ -41,7 +41,7 @@ Today, memory formats are: - framework‑specific - unstructured - difficult to serialize -- incompatible across agent systems +- incompatible across cognitive pipeline systems This RFC defines a **unified, interoperable memory schema** that supports: @@ -60,9 +60,9 @@ This RFC defines a **unified, interoperable memory schema** that supports: - Support multiple memory types (short‑term, long‑term, tool, episodic). - Support structured, typed memory entries. - Support deterministic serialization and replay. -- Support integration with the Agent Loop Protocol (RFC 0007). +- Support integration with the Cognitive Pipeline Protocol (RFC 0007). - Support memory updates, deletions, and versioning. -- Support multi‑agent memory isolation. +- Support multi‑cognitive pipeline memory isolation. ### 3.2 Non‑Goals - Defining a universal memory retrieval algorithm. @@ -74,7 +74,7 @@ This RFC defines a **unified, interoperable memory schema** that supports: ## 4. Memory Model -The Agent Memory Schema defines four categories: +The Cognitive pipeline Memory Schema defines four categories: ### 4.1 Short‑Term Memory (STM) Ephemeral memory used within a single reasoning episode. @@ -119,7 +119,7 @@ Examples: ```json { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "OpenCoT Agent Memory Schema v0.1", + "title": "OpenCoT Cognitive pipeline Memory Schema v0.1", "type": "object", "properties": { @@ -129,9 +129,9 @@ Examples: "description": "Schema version." }, - "agent_id": { + "requester_id": { "type": "string", - "description": "Unique identifier for the agent." + "description": "Unique identifier for the cognitive pipeline." }, "short_term_memory": { @@ -194,7 +194,7 @@ Examples: } }, - "required": ["version", "agent_id"] + "required": ["version", "requester_id"] } ``` @@ -284,7 +284,7 @@ This RFC will be accepted when: - At least 3 maintainers approve it. - A reference implementation can serialize and deserialize memory. -- At least one agent framework uses this schema. +- At least one cognitive pipeline framework uses this schema. - At least one dataset includes memory snapshots. --- @@ -292,11 +292,11 @@ This RFC will be accepted when: ## 11. Conclusion -This RFC defines the **Agent Memory Schema**, enabling: +This RFC defines the **Cognitive pipeline Memory Schema**, enabling: - structured memory - deterministic replay - multi‑episode reasoning - tool‑augmented memory - long‑horizon planning -- multi‑agent compatibility \ No newline at end of file +- multi‑cognitive pipeline compatibility \ No newline at end of file diff --git a/rfcs/0011-multi-agent-protocol.md b/rfcs/0011-multi-party-cognition-protocol.md similarity index 64% rename from rfcs/0011-multi-agent-protocol.md rename to rfcs/0011-multi-party-cognition-protocol.md index 2669a7e..91ff2f7 100644 --- a/rfcs/0011-multi-agent-protocol.md +++ b/rfcs/0011-multi-party-cognition-protocol.md @@ -1,4 +1,4 @@ -# RFC 0011 — Multi‑Agent Protocol (v0.1) +# RFC 0011 — Multi‑Cognitive pipeline Protocol (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,23 +8,23 @@ ## 1. Summary -This RFC defines the **Multi‑Agent Protocol**, a standardized framework for coordinating multiple LLM‑based agents that collaborate, compete, or specialize across tasks. +This RFC defines the **Multi‑Cognitive pipeline Protocol**, a standardized framework for coordinating multiple LLM‑based pipelines that collaborate, compete, or specialize across tasks. It extends: - RFC 0001 — Reasoning Schema - RFC 0003 — Tool Invocation Schema - RFC 0004 — Branching Reasoning Extensions -- RFC 0007 — Agent Loop Protocol -- RFC 0010 — Agent Memory Schema +- RFC 0007 — Cognitive Pipeline Protocol +- RFC 0010 — Cognitive pipeline Memory Schema -The goal is to define a **clean, interoperable protocol** for multi‑agent systems that exchange structured messages, share memory selectively, and coordinate reasoning. +The goal is to define a **clean, interoperable protocol** for multi‑cognitive pipeline systems that exchange structured messages, share memory selectively, and coordinate reasoning. --- ## 2. Motivation -Multi‑agent systems are increasingly important for: +Multi‑cognitive pipeline systems are increasingly important for: - decomposition of complex tasks - specialization (planner, coder, verifier, critic, executor) @@ -34,42 +34,42 @@ Multi‑agent systems are increasingly important for: - multi‑step planning - multi‑modal collaboration -Today, multi‑agent frameworks are: +Today, multi‑cognitive pipeline frameworks are: - inconsistent - unstructured - incompatible - difficult to serialize or replay -This RFC defines a **unified multi‑agent protocol** for structured reasoning ecosystems. +This RFC defines a **unified multi‑cognitive pipeline protocol** for structured reasoning ecosystems. --- ## 3. Design Goals ### 3.1 Must‑Have Goals -- Support structured agent‑to‑agent messages +- Support structured cognitive pipeline‑to‑cognitive pipeline messages - Support shared and private memory (RFC 0010) -- Support agent roles and capabilities +- Support cognitive pipeline roles and capabilities - Support deterministic replay -- Support multi‑agent reasoning graphs -- Support tool‑augmented multi‑agent workflows +- Support multi‑cognitive pipeline reasoning graphs +- Support tool‑augmented multi‑cognitive pipeline workflows ### 3.2 Non‑Goals -- Defining a universal agent architecture +- Defining a universal cognitive pipeline architecture - Defining a universal communication algorithm - Encoding model weights or training logs --- -## 4. Multi‑Agent Model +## 4. Multi‑Cognitive pipeline Model -A multi‑agent system consists of: +A multi‑cognitive pipeline system consists of: -- **agents[]** — each with identity, role, capabilities +- **pipelines[]** — each with identity, role, capabilities - **messages[]** — structured communication events - **shared_memory** — optional global memory -- **private_memory** — per‑agent memory (RFC 0010) +- **private_memory** — per‑cognitive pipeline memory (RFC 0010) - **coordination_strategy** — optional (planner, auction, voting, etc.) --- @@ -79,22 +79,22 @@ A multi‑agent system consists of: ```json { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "OpenCoT Multi-Agent Protocol v0.2", + "title": "OpenCoT Multi-Party Protocol v0.2", "type": "object", "properties": { "version": { "type": "string", "enum": ["0.2"] }, - "agents": { + "pipelines": { "type": "array", "items": { "type": "object", "properties": { - "agent_id": { "type": "string" }, + "requester_id": { "type": "string" }, "role": { "type": "string" }, "capabilities": { "type": "array", "items": { "type": "string" } } }, - "required": ["agent_id", "role"] + "required": ["requester_id", "role"] } }, @@ -119,7 +119,7 @@ A multi‑agent system consists of: "coordination_strategy": { "type": "string" } }, - "required": ["version", "agents", "messages"] + "required": ["version", "pipelines", "messages"] } ``` @@ -129,9 +129,9 @@ A multi‑agent system consists of: ```json { - "agents": [ - { "agent_id": "planner", "role": "planner" }, - { "agent_id": "coder", "role": "executor" } + "pipelines": [ + { "requester_id": "planner", "role": "planner" }, + { "requester_id": "coder", "role": "executor" } ], "messages": [ { @@ -152,16 +152,16 @@ A multi‑agent system consists of: ### 7.1 Messaging mode - **Decision:** Broadcast is supported as an explicit delivery mode in message metadata. -- **Rationale:** Multi-agent collaboration often requires fan-out coordination. +- **Rationale:** Multi-cognitive pipeline collaboration often requires fan-out coordination. - **Normative requirement:** Messages **MUST** support unicast delivery; broadcast **MAY** be used with explicit recipient semantics. - **Migration note:** Existing ad hoc broadcast conventions should be normalized through a delivery-mode field. -### 7.2 Agent groups +### 7.2 Cognitive pipeline groups -- **Decision:** Agent grouping is supported through optional `group_id` metadata. +- **Decision:** Cognitive pipeline grouping is supported through optional `group_id` metadata. - **Rationale:** Group semantics improve role orchestration without requiring topology hard-coding. - **Normative requirement:** Group membership **MAY** be declared; when declared, group IDs **MUST** be stable within a session. -- **Migration note:** Role-only systems can incrementally adopt groups without breaking existing agent identifiers. +- **Migration note:** Role-only systems can incrementally adopt groups without breaking existing cognitive pipeline identifiers. ### 7.3 Coordination strategy @@ -175,11 +175,11 @@ A multi‑agent system consists of: ## 8. Acceptance Criteria - Reference implementation -- Multi‑agent dataset -- Multi‑agent agent loop +- Multi‑cognitive pipeline dataset +- Multi‑cognitive pipeline cognitive pipeline --- ## 9. Conclusion -This RFC defines the **Multi‑Agent Protocol**, enabling structured multi‑agent collaboration. +This RFC defines the **Multi‑Cognitive pipeline Protocol**, enabling structured multi‑cognitive pipeline collaboration. diff --git a/rfcs/0013-memory-compression-embedding.md b/rfcs/0013-memory-compression-embedding.md index b1a17b5..0bbc4a7 100644 --- a/rfcs/0013-memory-compression-embedding.md +++ b/rfcs/0013-memory-compression-embedding.md @@ -1,36 +1,36 @@ # RFC 0013 — Memory Compression & Embedding (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.4 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.4 **Discussion:** https://github.com/supernovae/open-cot/discussions/13 --- ## 1. Summary -This RFC defines standards for **compressing, embedding, and summarizing agent memory** to support scalable long‑term memory (LTM) and efficient retrieval. +This RFC defines standards for **compressing, embedding, and summarizing cognitive pipeline memory** to support scalable long‑term memory (LTM) and efficient retrieval. It extends: -- RFC 0010 — Agent Memory Schema -- RFC 0007 — Agent Loop Protocol +- RFC 0010 — Cognitive pipeline Memory Schema +- RFC 0007 — Cognitive Pipeline Protocol --- ## 2. Motivation -Agents accumulate: +Pipelines accumulate: -- thousands of STM entries -- millions of LTM entries -- episodic logs -- tool state +- thousands of STM entries +- millions of LTM entries +- episodic logs +- tool state Without compression: -- memory becomes unbounded -- retrieval becomes slow -- serialization becomes expensive +- memory becomes unbounded +- retrieval becomes slow +- serialization becomes expensive This RFC defines **compression, summarization, and embedding formats**. @@ -38,11 +38,11 @@ This RFC defines **compression, summarization, and embedding formats**. ## 3. Design Goals -- Support lossy and lossless compression -- Support embedding‑based memory -- Support summarization chains -- Support provenance tracking -- Support deterministic replay +- Support lossy and lossless compression +- Support embedding‑based memory +- Support summarization chains +- Support provenance tracking +- Support deterministic replay --- @@ -50,11 +50,11 @@ This RFC defines **compression, summarization, and embedding formats**. Memory entries may include: -- `raw_value` -- `compressed_value` -- `embedding` -- `summary` -- `provenance` +- `raw_value` +- `compressed_value` +- `embedding` +- `summary` +- `provenance` --- @@ -82,7 +82,7 @@ Memory entries may include: ```json { "key": "project_history", - "summary": "Agent completed 12 tasks related to GPU provisioning.", + "summary": "Cognitive pipeline completed 12 tasks related to GPU provisioning.", "embedding": { "vector": [0.12, 0.44], "dim": 2 } } ``` diff --git a/rfcs/0014-memory-conflict-resolution.md b/rfcs/0014-memory-conflict-resolution.md index 13aa775..f58d70d 100644 --- a/rfcs/0014-memory-conflict-resolution.md +++ b/rfcs/0014-memory-conflict-resolution.md @@ -1,45 +1,45 @@ # RFC 0014 — Memory Conflict Resolution (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.4 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.4 **Discussion:** https://github.com/supernovae/open-cot/discussions/14 --- ## 1. Summary -This RFC defines the **Memory Conflict Resolution Standard**, a unified mechanism for resolving conflicting entries in agent memory, including: +This RFC defines the **Memory Conflict Resolution Standard**, a unified mechanism for resolving conflicting entries in cognitive pipeline memory, including: -- short‑term memory (STM) -- long‑term memory (LTM) -- episodic memory -- tool memory +- short‑term memory (STM) +- long‑term memory (LTM) +- episodic memory +- tool memory - compressed/embedded memory (RFC 0013) It extends: -- RFC 0010 — Agent Memory Schema -- RFC 0013 — Memory Compression & Embedding +- RFC 0010 — Cognitive pipeline Memory Schema +- RFC 0013 — Memory Compression & Embedding --- ## 2. Motivation -Agents accumulate memory from: +Pipelines accumulate memory from: -- multiple tools -- multiple agents (RFC 0011) -- multiple episodes -- multiple verifiers -- multiple reward signals +- multiple tools +- multiple pipelines (RFC 0011) +- multiple episodes +- multiple verifiers +- multiple reward signals Conflicts arise when: -- two entries have the same key -- two entries disagree -- two entries differ in confidence -- two entries differ in provenance -- two entries differ in timestamp +- two entries have the same key +- two entries disagree +- two entries differ in confidence +- two entries differ in provenance +- two entries differ in timestamp This RFC defines a **deterministic, reproducible conflict resolution algorithm**. @@ -47,12 +47,12 @@ This RFC defines a **deterministic, reproducible conflict resolution algorithm** ## 3. Design Goals -- Deterministic conflict resolution -- Support for confidence‑weighted merging -- Support for timestamp‑based precedence -- Support for provenance‑aware resolution -- Support for lossy and lossless merging -- Support for multi‑agent memory isolation +- Deterministic conflict resolution +- Support for confidence‑weighted merging +- Support for timestamp‑based precedence +- Support for provenance‑aware resolution +- Support for lossy and lossless merging +- Support for multi‑cognitive pipeline memory isolation --- @@ -157,4 +157,4 @@ User‑defined logic. ## 8. Conclusion -This RFC defines deterministic, reproducible memory conflict resolution for all agent memory types. +This RFC defines deterministic, reproducible memory conflict resolution for all cognitive pipeline memory types. diff --git a/rfcs/0015-multi-agent-reward-sharing.md b/rfcs/0015-multi-party-reward-sharing.md similarity index 68% rename from rfcs/0015-multi-agent-reward-sharing.md rename to rfcs/0015-multi-party-reward-sharing.md index 3c38ce1..10223aa 100644 --- a/rfcs/0015-multi-agent-reward-sharing.md +++ b/rfcs/0015-multi-party-reward-sharing.md @@ -1,4 +1,4 @@ -# RFC 0015 — Multi‑Agent Reward Sharing (v0.1) +# RFC 0015 — Multi‑Cognitive pipeline Reward Sharing (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,25 +8,25 @@ ## 1. Summary -This RFC defines the **Multi‑Agent Reward Sharing Standard**, enabling structured reward distribution across multiple collaborating or competing agents. +This RFC defines the **Multi‑Cognitive pipeline Reward Sharing Standard**, enabling structured reward distribution across multiple collaborating or competing pipelines. It extends: - RFC 0005 — RL Reward Trace Schema -- RFC 0011 — Multi‑Agent Protocol +- RFC 0011 — Multi‑Cognitive pipeline Protocol --- ## 2. Motivation -Multi‑agent systems require reward sharing for: +Multi‑cognitive pipeline systems require reward sharing for: - cooperative tasks - competitive tasks - hierarchical planning - self‑play - distributed tool use -- multi‑agent RL +- multi‑cognitive pipeline RL Without a standard: @@ -41,10 +41,10 @@ This RFC defines a **unified reward sharing schema**. ## 3. Reward Sharing Models ### 3.1 Cooperative -All agents share the same reward. +All pipelines share the same reward. ### 3.2 Competitive -Agents receive opposing rewards. +Pipelines receive opposing rewards. ### 3.3 Mixed Some rewards are shared, some are individual. @@ -63,7 +63,7 @@ User‑defined reward mapping. { "version": "0.1", "trace_id": "string", - "agents": ["planner", "coder", "verifier"], + "pipelines": ["planner", "coder", "verifier"], "reward_model": "cooperative", "agent_rewards": { "planner": 1.0, @@ -93,4 +93,4 @@ User‑defined reward mapping. ## 6. Conclusion -This RFC standardizes reward sharing across multi‑agent systems. +This RFC standardizes reward sharing across multi‑cognitive pipeline systems. diff --git a/rfcs/0016-tool-capability-negotiation.md b/rfcs/0016-tool-capability-negotiation.md index 9fbad5d..51bf4bb 100644 --- a/rfcs/0016-tool-capability-negotiation.md +++ b/rfcs/0016-tool-capability-negotiation.md @@ -1,24 +1,24 @@ # RFC 0016 — Tool Capability Negotiation (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.4 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.4 **Discussion:** https://github.com/supernovae/open-cot/discussions/16 --- ## 1. Summary -This RFC defines the **Tool Capability Negotiation Standard**, enabling agents to: +This RFC defines the **Tool Capability Negotiation Standard**, enabling pipelines to: -- discover tool capabilities -- negotiate tool parameters -- validate tool compatibility -- adapt reasoning based on tool constraints +- discover tool capabilities +- negotiate tool parameters +- validate tool compatibility +- adapt reasoning based on tool constraints It extends: -- RFC 0003 — Tool Invocation Schema -- RFC 0007 — Agent Loop Protocol +- RFC 0003 — Tool Invocation Schema +- RFC 0007 — Cognitive Pipeline Protocol --- @@ -26,14 +26,14 @@ It extends: Tools vary in: -- input formats -- output formats -- rate limits -- supported operations -- authentication requirements -- cost models +- input formats +- output formats +- rate limits +- supported operations +- authentication requirements +- cost models -Agents must negotiate capabilities before invoking tools. +Pipelines must negotiate capabilities before invoking tools. This RFC defines a **structured negotiation protocol**. @@ -41,13 +41,13 @@ This RFC defines a **structured negotiation protocol**. ## 3. Capability Types -- `input_schema` -- `output_schema` -- `supported_operations` -- `cost_per_call` -- `max_batch_size` -- `authentication_required` -- `version` +- `input_schema` +- `output_schema` +- `supported_operations` +- `cost_per_call` +- `max_batch_size` +- `authentication_required` +- `version` --- @@ -56,7 +56,7 @@ This RFC defines a **structured negotiation protocol**. ```json { "tool_name": "string", - "agent_id": "string", + "requester_id": "string", "requested_capabilities": ["input_schema", "supported_operations"], "tool_response": { "input_schema": {}, diff --git a/rfcs/0017-agent-safety-sandboxing.md b/rfcs/0017-runtime-safety-sandboxing.md similarity index 67% rename from rfcs/0017-agent-safety-sandboxing.md rename to rfcs/0017-runtime-safety-sandboxing.md index 99d26ac..25c7b14 100644 --- a/rfcs/0017-agent-safety-sandboxing.md +++ b/rfcs/0017-runtime-safety-sandboxing.md @@ -1,4 +1,4 @@ -# RFC 0017 — Agent Safety & Sandboxing (v0.1) +# RFC 0017 — Cognitive pipeline Safety & Sandboxing (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,25 +8,25 @@ ## 1. Summary -This RFC defines the **Agent Safety & Sandboxing Standard**, a unified mechanism for restricting, monitoring, and validating agent actions, tool calls, and memory access. +This RFC defines the **Cognitive pipeline Safety & Sandboxing Standard**, a unified mechanism for restricting, monitoring, and validating cognitive pipeline actions, tool calls, and memory access. It extends: - RFC 0003 — Tool Invocation Schema -- RFC 0007 — Agent Loop Protocol -- RFC 0010 — Agent Memory Schema +- RFC 0007 — Cognitive Pipeline Protocol +- RFC 0010 — Cognitive pipeline Memory Schema --- ## 2. Motivation -Agents require safety boundaries to prevent: +Pipelines require safety boundaries to prevent: - unsafe tool calls - unauthorized memory access - infinite loops - excessive branching -- unsafe multi‑agent interactions +- unsafe multi‑cognitive pipeline interactions - unbounded resource usage This RFC defines a **sandbox layer** that enforces constraints. @@ -39,7 +39,7 @@ This RFC defines a **sandbox layer** that enforces constraints. - **Memory Safety** — read/write permissions, key‑level ACLs - **Execution Safety** — step limits, recursion limits - **Branching Safety** — max branches, pruning rules -- **Multi‑Agent Safety** — message filtering, role isolation +- **Multi‑Cognitive pipeline Safety** — message filtering, role isolation --- @@ -49,14 +49,14 @@ This RFC defines a **sandbox layer** that enforces constraints. ```json { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0017 — Agent Safety & Sandboxing Configuration", - "description": "Defines sandbox policies that constrain agent behavior at runtime: which tools are permitted, step/branch limits, and memory access controls.", + "title": "Open CoT RFC 0017 — Cognitive pipeline Safety & Sandboxing Configuration", + "description": "Defines sandbox policies that constrain cognitive pipeline behavior at runtime: which tools are permitted, step/branch limits, and memory access controls.", "type": "object", "properties": { "allowed_tools": { "type": "array", "items": { "type": "string" }, - "description": "Tool names the agent may invoke. Use [\"*\"] to allow all." + "description": "Tool names the cognitive pipeline may invoke. Use [\"*\"] to allow all." }, "blocked_tools": { "type": "array", @@ -75,7 +75,7 @@ This RFC defines a **sandbox layer** that enforces constraints. }, "memory_acl": { "type": "object", - "description": "Access control list mapping role or agent IDs to permission arrays.", + "description": "Access control list mapping role or requester IDs to permission arrays.", "additionalProperties": { "type": "array", "items": { @@ -121,4 +121,4 @@ This RFC defines a **sandbox layer** that enforces constraints. ## 6. Conclusion -This RFC defines a unified safety and sandboxing layer for agent execution. +This RFC defines a unified safety and sandboxing layer for cognitive pipeline execution. diff --git a/rfcs/0018-tool-error-taxonomy.md b/rfcs/0018-tool-error-taxonomy.md index be7f811..fdbd563 100644 --- a/rfcs/0018-tool-error-taxonomy.md +++ b/rfcs/0018-tool-error-taxonomy.md @@ -13,7 +13,7 @@ This RFC defines a structured error taxonomy for tool invocation failures and de It extends: - RFC 0003 — Tool Invocation Schema -- RFC 0007 — Agent Loop Protocol +- RFC 0007 — Cognitive Pipeline Protocol --- diff --git a/rfcs/0019-multi-agent-planning-graphs.md b/rfcs/0019-collaborative-planning-graphs.md similarity index 69% rename from rfcs/0019-multi-agent-planning-graphs.md rename to rfcs/0019-collaborative-planning-graphs.md index 01a66c8..63e4314 100644 --- a/rfcs/0019-multi-agent-planning-graphs.md +++ b/rfcs/0019-collaborative-planning-graphs.md @@ -1,4 +1,4 @@ -# RFC 0019 — Multi‑Agent Planning Graphs (v0.1) +# RFC 0019 — Multi‑Cognitive pipeline Planning Graphs (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,18 +8,18 @@ ## 1. Summary -This RFC defines the **Multi‑Agent Planning Graph Schema**, enabling structured representation of collaborative planning across multiple agents. +This RFC defines the **Multi‑Cognitive pipeline Planning Graph Schema**, enabling structured representation of collaborative planning across multiple pipelines. It extends: - RFC 0004 — Branching Reasoning Extensions -- RFC 0011 — Multi‑Agent Protocol +- RFC 0011 — Multi‑Cognitive pipeline Protocol --- ## 2. Motivation -Multi‑agent systems require: +Multi‑cognitive pipeline systems require: - shared planning graphs - role‑based subgraphs @@ -35,7 +35,7 @@ This RFC defines a **graph‑based planning representation**. - **nodes** — tasks, subgoals, tool calls - **edges** — dependencies -- **owners** — agent responsible for each node +- **owners** — cognitive pipeline responsible for each node - **status** — pending, running, done, failed --- @@ -78,4 +78,4 @@ This RFC defines a **graph‑based planning representation**. ## 6. Conclusion -This RFC defines a unified planning graph for multi‑agent coordination. +This RFC defines a unified planning graph for multi‑cognitive pipeline coordination. diff --git a/rfcs/0021-agent-capability-declaration.md b/rfcs/0021-capability-declaration.md similarity index 67% rename from rfcs/0021-agent-capability-declaration.md rename to rfcs/0021-capability-declaration.md index e2e59f5..9cec294 100644 --- a/rfcs/0021-agent-capability-declaration.md +++ b/rfcs/0021-capability-declaration.md @@ -1,4 +1,4 @@ -# RFC 0021 — Agent Capability Declaration (v0.1) +# RFC 0021 — Cognitive pipeline Capability Declaration (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,11 +8,11 @@ ## 1. Summary -This RFC defines a **capability declaration format** for agents, enabling: +This RFC defines a **capability declaration format** for pipelines, enabling: - capability discovery - compatibility checks -- multi‑agent coordination +- multi‑cognitive pipeline coordination - tool negotiation (RFC 0016) --- @@ -24,7 +24,7 @@ This RFC defines a **capability declaration format** for agents, enabling: - tool use - memory - verification -- multi‑agent communication +- multi‑cognitive pipeline communication - safety level --- @@ -33,7 +33,7 @@ This RFC defines a **capability declaration format** for agents, enabling: ```json { - "agent_id": "planner", + "requester_id": "planner", "capabilities": { "reasoning": true, "planning": true, @@ -50,7 +50,7 @@ This RFC defines a **capability declaration format** for agents, enabling: ```json { - "agent_id": "coder", + "requester_id": "coder", "capabilities": { "tool_use": ["compiler", "executor"] } @@ -61,4 +61,4 @@ This RFC defines a **capability declaration format** for agents, enabling: ## 5. Conclusion -This RFC defines a unified capability declaration for agents. +This RFC defines a unified capability declaration for pipelines. diff --git a/rfcs/0022-agent-evaluation-protocol.md b/rfcs/0022-cognitive-evaluation-protocol.md similarity index 96% rename from rfcs/0022-agent-evaluation-protocol.md rename to rfcs/0022-cognitive-evaluation-protocol.md index b4a54fd..6ca2090 100644 --- a/rfcs/0022-agent-evaluation-protocol.md +++ b/rfcs/0022-cognitive-evaluation-protocol.md @@ -1,4 +1,4 @@ -# RFC 0022 — Agent Evaluation Protocol (v0.1) +# RFC 0022 — Cognitive pipeline Evaluation Protocol (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026-04-14 @@ -46,7 +46,7 @@ It integrates RFC 0001 (trace), RFC 0008 (dataset packaging), RFC 0029 (benchmar ```json { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0022 — Agent Evaluation Protocol", + "title": "Open CoT RFC 0022 — Cognitive pipeline Evaluation Protocol", "type": "object", "properties": { "version": { "type": "string", "enum": ["0.1"] }, diff --git a/rfcs/0023-humain-in-the-loop-schema.md b/rfcs/0023-humain-in-the-loop-schema.md index 645bc6a..9e3b5ce 100644 --- a/rfcs/0023-humain-in-the-loop-schema.md +++ b/rfcs/0023-humain-in-the-loop-schema.md @@ -4,9 +4,9 @@ ## 1. Summary -This RFC defines the **Human-in-the-Loop (HITL) Interaction Schema** for Open-CoT, the cognitive control plane for governed agent execution. It standardizes how agents **request** human judgment (`approval`, `clarification`, `review`, `override`), how supervisors **respond**, and how responses **resume** execution. Payloads are transport-agnostic (UIs, tickets, chatops, async queues). +This RFC defines the **Human-in-the-Loop (HITL) Interaction Schema** for Open-CoT, the cognitive control plane for governed cognitive pipeline execution. It standardizes how pipelines **request** human judgment (`approval`, `clarification`, `review`, `override`), how supervisors **respond**, and how responses **resume** execution. Payloads are transport-agnostic (UIs, tickets, chatops, async queues). -In [RFC 0007](0007-agent-loop-protocol.md), HITL maps to **`escalate`**: the run pauses on a `human_interaction_request` correlated to `run_id`, `agent_id`, and `step_ref`, until a `human_interaction_response`, **timeout**, or cancellation. +In [RFC 0007](0007-cognitive-pipeline-protocol.md), HITL maps to **`escalate`**: the run pauses on a `human_interaction_request` correlated to `run_id`, `requester_id`, and `step_ref`, until a `human_interaction_response`, **timeout**, or cancellation. ## 2. Motivation @@ -16,9 +16,9 @@ In [RFC 0007](0007-agent-loop-protocol.md), HITL maps to **`escalate`**: the run **Types:** `approval` (sign-off before side effects), `clarification` (disambiguation), `review` (artifact review), `override` (supersede prior decisions within bounds). **`options[]`** holds `{ id, label, description?, risk_hint? }`; `approval`/`clarification` SHOULD include options for deterministic automation. **`human_interaction_response.decision`** is `approved`, `rejected`, `modified`, or `timeout`; `modified` SHOULD carry **`justification`** (extensions hold extra payload). -**`urgency`** (`low`…`critical`) affects queueing only—not ACL bypass. **`timeout_seconds`** bounds wait before auto-`timeout` (policy defines deny vs retry). **`context`** MUST include `run_id`, `agent_id`, `step_ref`. **`requested_by.agent`** identifies the principal; **`presented_to.human`** names role, person, or queue. +**`urgency`** (`low`…`critical`) affects queueing only—not ACL bypass. **`timeout_seconds`** bounds wait before auto-`timeout` (policy defines deny vs retry). **`context`** MUST include `run_id`, `requester_id`, `step_ref`. **`requested_by.cognitive pipeline`** identifies the principal; **`presented_to.human`** names role, person, or queue. -**FSM:** On **`escalate`**, emit `human_interaction_request` before the governed action. On `approved` (+ `selected_option` when options exist), resume per [RFC 0007](0007-agent-loop-protocol.md) toward `validate_authority` / `observe_result`. On `rejected` or timeout-as-deny, do not perform the blocked effect without new delegation ([RFC 0047](0047-delegation-extension.md)). +**FSM:** On **`escalate`**, emit `human_interaction_request` before the governed action. On `approved` (+ `selected_option` when options exist), resume per [RFC 0007](0007-cognitive-pipeline-protocol.md) toward `validate_authority` / `observe_result`. On `rejected` or timeout-as-deny, do not perform the blocked effect without new delegation ([RFC 0047](0047-delegation-extension.md)). ## 4. JSON Schema @@ -55,19 +55,19 @@ In [RFC 0007](0007-agent-loop-protocol.md), HITL maps to **`escalate`**: the run "context": { "type": "object", "additionalProperties": false, - "required": ["run_id", "agent_id", "step_ref"], + "required": ["run_id", "requester_id", "step_ref"], "properties": { "run_id": { "type": "string", "minLength": 1 }, - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "step_ref": { "type": "string", "minLength": 1 } } }, "requested_by": { "type": "object", "additionalProperties": false, - "required": ["agent"], + "required": ["pipeline"], "properties": { - "agent": { "type": "string", "minLength": 1 }, + "pipeline": { "type": "string", "minLength": 1 }, "role": { "type": "string" } } }, @@ -121,8 +121,8 @@ In [RFC 0007](0007-agent-loop-protocol.md), HITL maps to **`escalate`**: the run ], "urgency": "high", "timeout_seconds": 900, - "context": { "run_id": "run_7b91", "agent_id": "support-agent-prod", "step_ref": "plan/12/tool/sql.execute" }, - "requested_by": { "agent": "support-agent-prod", "role": "tier2" }, + "context": { "run_id": "run_7b91", "requester_id": "support-cognitive-pipeline-prod", "step_ref": "plan/12/tool/sql.execute" }, + "requested_by": { "pipeline": "support-cognitive-pipeline-prod", "role": "tier2" }, "presented_to": { "human": "oncall-db", "queue": "risk-review", "channel": "pager" } } ``` @@ -140,8 +140,8 @@ In [RFC 0007](0007-agent-loop-protocol.md), HITL maps to **`escalate`**: the run ], "urgency": "medium", "timeout_seconds": 3600, - "context": { "run_id": "run_2aa4", "agent_id": "records-agent", "step_ref": "plan/4/delegate/archive_policy" }, - "requested_by": { "agent": "records-agent" }, + "context": { "run_id": "run_2aa4", "requester_id": "records-cognitive pipeline", "step_ref": "plan/4/delegate/archive_policy" }, + "requested_by": { "pipeline": "records-cognitive pipeline" }, "presented_to": { "human": "legal-ops", "queue": "clarifications" } } ``` @@ -150,7 +150,7 @@ In [RFC 0007](0007-agent-loop-protocol.md), HITL maps to **`escalate`**: the run | RFC | Title | Relationship | |-----|--------|----------------| -| [RFC 0007](0007-agent-loop-protocol.md) | Agent Loop Protocol | `escalate` pause/resume. | +| [RFC 0007](0007-cognitive-pipeline-protocol.md) | Cognitive Pipeline Protocol | `escalate` pause/resume. | | [RFC 0041](0041-policy-enforcement-schema.md) | Policy Enforcement | `require_approval` → typed requests. | | [RFC 0042](0042-permission-acl.md) | Permissions & ACL | Human-consent grants bind `request_id` / context. | | [RFC 0047](0047-delegation-extension.md) | Delegation | Overrides may require re-delegation. | @@ -160,13 +160,13 @@ In [RFC 0007](0007-agent-loop-protocol.md), HITL maps to **`escalate`**: the run | Question | Resolution | |----------|------------| -| Subsume RLHF / eval feedback ([RFC 0005](0005-rl-reward-trace-schema.md), [RFC 0022](0022-agent-evaluation-protocol.md))? | **No**—those are training/eval traces; this is **runtime governance**. | +| Subsume RLHF / eval feedback ([RFC 0005](0005-rl-reward-trace-schema.md), [RFC 0022](0022-cognitive-evaluation-protocol.md))? | **No**—those are training/eval traces; this is **runtime governance**. | | Mandatory `options`? | **SHOULD** for `approval`/`clarification`; optional for `review`/`override` if freeform is allowed. | -| Who may respond? | **`responder_id`** MUST be authenticated; tie to [RFC 0026](0026-agent-identity-auth.md) where possible. | +| Who may respond? | **`responder_id`** MUST be authenticated; tie to [RFC 0026](0026-requester-identity-auth.md) where possible. | ## 8. Acceptance Criteria -1. Each `human_interaction_request` validates and includes `context.run_id`, `context.agent_id`, `context.step_ref`. +1. Each `human_interaction_request` validates and includes `context.run_id`, `context.requester_id`, `context.step_ref`. 2. Each `human_interaction_response` references `request_id` and a normative `decision`. -3. [RFC 0007](0007-agent-loop-protocol.md) implementations MUST emit these records on `escalate` for [RFC 0041](0041-policy-enforcement-schema.md) `require_approval` when using this profile. +3. [RFC 0007](0007-cognitive-pipeline-protocol.md) implementations MUST emit these records on `escalate` for [RFC 0041](0041-policy-enforcement-schema.md) `require_approval` when using this profile. 4. Auto-timeout responses use `decision: "timeout"`; policy documents timeout semantics and clocks. diff --git a/rfcs/0024-multi-modal-reasoning-schema.md b/rfcs/0024-multi-modal-reasoning-schema.md index f359bc9..31d9961 100644 --- a/rfcs/0024-multi-modal-reasoning-schema.md +++ b/rfcs/0024-multi-modal-reasoning-schema.md @@ -8,7 +8,7 @@ ## 1. Summary -This RFC defines a **multi‑modal extension** to the reasoning schema, enabling agents to reason over: +This RFC defines a **multi‑modal extension** to the reasoning schema, enabling pipelines to reason over: - text - images diff --git a/rfcs/0026-agent-identity-auth.md b/rfcs/0026-requester-identity-auth.md similarity index 66% rename from rfcs/0026-agent-identity-auth.md rename to rfcs/0026-requester-identity-auth.md index f54af3c..7d5d04f 100644 --- a/rfcs/0026-agent-identity-auth.md +++ b/rfcs/0026-requester-identity-auth.md @@ -1,4 +1,4 @@ -# RFC 0026 — Agent Identity & Authentication (v0.2) +# RFC 0026 — Cognitive pipeline Identity & Authentication (v0.2) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026-04-14 @@ -8,17 +8,17 @@ ## 1. Summary -Open CoT is a **cognitive control plane**: the harness mediates every transition between reasoning, policy, delegation, and tool execution. **Agent identity** is how the harness answers *who* is requesting authority. Every delegation request, permission grant, and audit event references a stable `agent_id` and the trust metadata defined here. +Open CoT is a **cognitive control plane**: the harness mediates every transition between reasoning, policy, delegation, and tool execution. **Cognitive pipeline identity** is how the harness answers *who* is requesting authority. Every delegation request, permission grant, and audit event references a stable `requester_id` and the trust metadata defined here. This RFC specifies the normative **AgentIdentity** record: identifiers, human-facing labels, operational role, trust tier, declared capabilities (claims only—never grants), optional cryptographic material for signature verification, optional delegation lineage, timestamps, and extensible metadata. -**Cross-references:** [RFC 0007 — Agent Loop / FSM](0007-agent-loop-protocol.md) (governed states bind identity to transitions); [RFC 0041 — Policy Enforcement](0041-policy-enforcement-schema.md) (subject matching against `agent_id` and trust); [RFC 0042 — Permissions](0042-permission-acl.md) (`granted_to` and ACL subjects); [RFC 0047 — Delegation Extension](0047-delegation-extension.md) (`delegation_request.requester` MUST resolve to a verified identity). +**Cross-references:** [RFC 0007 — Cognitive Pipeline / FSM](0007-cognitive-pipeline-protocol.md) (governed states bind identity to transitions); [RFC 0041 — Policy Enforcement](0041-policy-enforcement-schema.md) (subject matching against `requester_id` and trust); [RFC 0042 — Permissions](0042-permission-acl.md) (`granted_to` and ACL subjects); [RFC 0047 — Delegation Extension](0047-delegation-extension.md) (`delegation_request.requester` MUST resolve to a verified identity). --- ## 2. Motivation -Without a typed identity model, frameworks conflate “the model said so” with authorization, lose auditability across sub-agents, and cannot express pre-authorized tool tiers consistently. A single **AgentIdentity** schema lets policy engines match rules, lets permission stores attach grants to principals, and lets delegation receipts bind authority to a **verified** `agent_id`. +Without a typed identity model, frameworks conflate “the model said so” with authorization, lose auditability across sub-pipelines, and cannot express pre-authorized tool tiers consistently. A single **AgentIdentity** schema lets policy engines match rules, lets permission stores attach grants to principals, and lets delegation receipts bind authority to a **verified** `requester_id`. --- @@ -26,14 +26,14 @@ Without a typed identity model, frameworks conflate “the model said so” with | Field | Required | Description | |-------|----------|-------------| -| `agent_id` | yes | Globally unique identifier for this principal within the deployment (URI-safe string). | +| `requester_id` | yes | Globally unique identifier for this principal within the deployment (URI-safe string). | | `display_name` | yes | Human-readable label for UIs and logs. | | `role` | yes | Operational role enum (orchestration vs execution vs verification vs delegated vs custom). | | `trust_level` | yes | Harness-defined trust tier affecting default pre-authorization posture. | -| `capabilities_declared` | yes | Array of capability strings this agent **claims** to support (advertisement only; grants live in RFC 0042). | +| `capabilities_declared` | yes | Array of capability strings this cognitive pipeline **claims** to support (advertisement only; grants live in RFC 0042). | | `public_key` | no | Public key material (encoding defined by deployment; often PEM or base64 raw key). | | `key_algorithm` | no | Algorithm identifier, e.g. `ed25519`, `p256`. MUST be present if `public_key` is set. | -| `parent_agent_id` | no | If this identity is a delegated sub-agent, the `agent_id` of the delegator. | +| `parent_requester_id` | no | If this identity is a delegated sub-cognitive pipeline, the `requester_id` of the delegator. | | `created_at` | yes | RFC 3339 timestamp when this identity record was first registered. | | `metadata` | yes | Extensible object for org-specific attributes (team, tenant, labels). MAY be empty `{}`. | @@ -48,8 +48,8 @@ Trust levels describe **default harness posture** for pre-authorized tooling and | `untrusted` | No tools pre-authorized; every sensitive action flows through explicit delegation unless a standing grant says otherwise. | | `low` | Basic **read-only** tools MAY be pre-authorized per deployment policy (e.g., search, calculators). | | `medium` | A **standard** curated tool set MAY be pre-authorized (still subject to policy and audit). | -| `high` | **Broad** pre-authorization for vetted agents; the harness MUST still refuse **self-granted** writes—writes require receipts or human/policy decisions. | -| `system` | **Harness-internal** identity (scheduler, broker, policy adapter). MUST NOT be assigned to model-backed agents. | +| `high` | **Broad** pre-authorization for vetted pipelines; the harness MUST still refuse **self-granted** writes—writes require receipts or human/policy decisions. | +| `system` | **Harness-internal** identity (scheduler, broker, policy adapter). MUST NOT be assigned to model-backed pipelines. | --- @@ -59,13 +59,13 @@ Trust levels describe **default harness posture** for pre-authorized tooling and ```json { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://opencot.dev/schema/rfc0026/agent-identity.json", - "title": "Open CoT RFC 0026 — Agent Identity", + "$id": "https://opencot.dev/schema/rfc0026/requester-identity.json", + "title": "Open CoT RFC 0026 — Cognitive pipeline Identity", "type": "object", "additionalProperties": false, "properties": { "schema_version": { "type": "string", "enum": ["0.2"] }, - "agent_id": { "type": "string", "minLength": 1, "pattern": "^[A-Za-z0-9._:@/-]+$" }, + "requester_id": { "type": "string", "minLength": 1, "pattern": "^[A-Za-z0-9._:@/-]+$" }, "display_name": { "type": "string", "minLength": 1 }, "role": { "type": "string", @@ -81,13 +81,13 @@ Trust levels describe **default harness posture** for pre-authorized tooling and }, "public_key": { "type": "string" }, "key_algorithm": { "type": "string" }, - "parent_agent_id": { "type": "string", "minLength": 1 }, + "parent_requester_id": { "type": "string", "minLength": 1 }, "created_at": { "type": "string", "format": "date-time" }, "metadata": { "type": "object" } }, "required": [ "schema_version", - "agent_id", + "requester_id", "display_name", "role", "trust_level", @@ -114,7 +114,7 @@ Trust levels describe **default harness posture** for pre-authorized tooling and ```json { "schema_version": "0.2", - "agent_id": "agent:org/acme/planner-main", + "requester_id": "cognitive-pipeline:org/acme/planner-main", "display_name": "Acme Planner", "role": "orchestrator", "trust_level": "high", @@ -131,12 +131,12 @@ Trust levels describe **default harness posture** for pre-authorized tooling and ```json { "schema_version": "0.2", - "agent_id": "agent:org/acme/exec-worker-07", + "requester_id": "cognitive-pipeline:org/acme/exec-worker-07", "display_name": "Delegated worker 07", "role": "delegated", "trust_level": "low", "capabilities_declared": ["tool:email.read_headers"], - "parent_agent_id": "agent:org/acme/planner-main", + "parent_requester_id": "cognitive-pipeline:org/acme/planner-main", "created_at": "2026-04-14T12:05:00Z", "metadata": {} } @@ -149,18 +149,18 @@ Trust levels describe **default harness posture** for pre-authorized tooling and | Question | Resolution | |----------|------------| | Are `capabilities_declared` normative for policy? | **No.** They are **hints** and audit context. Enforcement uses explicit permissions ([RFC 0042](0042-permission-acl.md)) and delegation receipts ([RFC 0047](0047-delegation-extension.md)). | -| Can a model agent use `trust_level: system`? | **No.** `system` is reserved for harness components; registrars MUST reject assignment to model-backed identities. | +| Can a model cognitive pipeline use `trust_level: system`? | **No.** `system` is reserved for harness components; registrars MUST reject assignment to model-backed identities. | | Encoding of `public_key`? | **Deployment-defined.** The schema only requires `key_algorithm` when a key is present; transports SHOULD document encoding (PEM vs raw). | --- ## 8. Acceptance criteria -1. Every `DelegationRequest.requester` ([RFC 0047](0047-delegation-extension.md)) MUST equal an `agent_id` registered in the harness identity store conforming to this schema. -2. Policy `subject` fields ([RFC 0041](0041-policy-enforcement-schema.md)) MAY reference `agent_id`, `role`, `trust_level`, and labels in `metadata`. -3. Permission grants ([RFC 0042](0042-permission-acl.md)) MUST bind to `agent_id` (or a group resolved to agents), never to free-text model self-identification. +1. Every `DelegationRequest.requester` ([RFC 0047](0047-delegation-extension.md)) MUST equal an `requester_id` registered in the harness identity store conforming to this schema. +2. Policy `subject` fields ([RFC 0041](0041-policy-enforcement-schema.md)) MAY reference `requester_id`, `role`, `trust_level`, and labels in `metadata`. +3. Permission grants ([RFC 0042](0042-permission-acl.md)) MUST bind to `requester_id` (or a group resolved to pipelines), never to free-text model self-identification. 4. Implementations MUST treat `capabilities_declared` as non-authoritative for allow/deny decisions unless a separate grant references them. -5. Validators MUST reject `public_key` without `key_algorithm`, and reject `trust_level: system` on identities tagged as model agents in the deployment registry. +5. Validators MUST reject `public_key` without `key_algorithm`, and reject `trust_level: system` on identities tagged as model pipelines in the deployment registry. --- diff --git a/rfcs/0027-distributed-agent-execution-protocol.md b/rfcs/0027-distributed-execution-protocol.md similarity index 77% rename from rfcs/0027-distributed-agent-execution-protocol.md rename to rfcs/0027-distributed-execution-protocol.md index 714b46d..be9afe0 100644 --- a/rfcs/0027-distributed-agent-execution-protocol.md +++ b/rfcs/0027-distributed-execution-protocol.md @@ -1,12 +1,12 @@ -# RFC 0027 — Distributed Agent Execution Protocol, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 +# RFC 0027 — Distributed Cognitive pipeline Execution Protocol, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 **Discussion:** https://github.com/supernovae/open-cot/discussions/27 ## 1. Summary -This RFC defines the **Distributed Agent Execution Protocol** for Open-CoT: **`execution_node`** advertisements, **`task_assignment`** with explicit **`delegated_scope`**, and terminal **`execution_result`** records (hashes, trace pointers, **`receipt_refs[]`**). Each node runs its own governed FSM ([RFC 0007](0007-agent-loop-protocol.md)); coordination is explicit. +This RFC defines the **Distributed Cognitive pipeline Execution Protocol** for Open-CoT: **`execution_node`** advertisements, **`task_assignment`** with explicit **`delegated_scope`**, and terminal **`execution_result`** records (hashes, trace pointers, **`receipt_refs[]`**). Each node runs its own governed FSM ([RFC 0007](0007-cognitive-pipeline-protocol.md)); coordination is explicit. -Participants have distinct identities ([RFC 0026](0026-agent-identity-auth.md)) and MUST obtain authority independently ([RFC 0047](0047-delegation-extension.md)). [RFC 0048](0048-execution-receipts-audit-envelopes.md) receipts chain cross-node work. +Participants have distinct identities ([RFC 0026](0026-requester-identity-auth.md)) and MUST obtain authority independently ([RFC 0047](0047-delegation-extension.md)). [RFC 0048](0048-execution-receipts-audit-envelopes.md) receipts chain cross-node work. ## 2. Motivation @@ -14,9 +14,9 @@ Regulated workloads need sandboxes, residency, and blast-radius isolation. Ad ho ## 3. Design -**`execution_node`:** `node_id` (unique), `agent_id` ([RFC 0026](0026-agent-identity-auth.md)), `endpoint` (URL, queue, etc.), `capabilities[]` (e.g. `code.exec`, `tool.invoke`; see [RFC 0016](0016-tool-capability-negotiation.md)), `trust_level` (`low` \| `medium` \| `high`), `status` (`active` \| `draining` \| `offline`). +**`execution_node`:** `node_id` (unique), `requester_id` ([RFC 0026](0026-requester-identity-auth.md)), `endpoint` (URL, queue, etc.), `capabilities[]` (e.g. `code.exec`, `tool.invoke`; see [RFC 0016](0016-tool-capability-negotiation.md)), `trust_level` (`low` \| `medium` \| `high`), `status` (`active` \| `draining` \| `offline`). -**`task_assignment`:** `assignment_id`, `task_hash` (canonical task bytes), `assigned_to` (`node_id`), `parent_run_id` ([RFC 0007](0007-agent-loop-protocol.md)), `delegated_scope` (at minimum `summary`; MAY include `allowed_tools`, `max_risk_level`, `valid_until`) proven under [RFC 0047](0047-delegation-extension.md), `timeout_seconds`, `priority`. +**`task_assignment`:** `assignment_id`, `task_hash` (canonical task bytes), `assigned_to` (`node_id`), `parent_run_id` ([RFC 0007](0007-cognitive-pipeline-protocol.md)), `delegated_scope` (at minimum `summary`; MAY include `allowed_tools`, `max_risk_level`, `valid_until`) proven under [RFC 0047](0047-delegation-extension.md), `timeout_seconds`, `priority`. **`execution_result`:** `assignment_id`, `node_id`, `status` (`completed` \| `failed` \| `timeout`), `result_hash`, `trace_ref`, `receipt_refs[]` ([RFC 0048](0048-execution-receipts-audit-envelopes.md)). @@ -36,10 +36,10 @@ Regulated workloads need sandboxes, residency, and blast-radius isolation. Ad ho "execution_node": { "type": "object", "additionalProperties": false, - "required": ["node_id", "agent_id", "endpoint", "capabilities", "trust_level", "status"], + "required": ["node_id", "requester_id", "endpoint", "capabilities", "trust_level", "status"], "properties": { "node_id": { "type": "string", "minLength": 1 }, - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "endpoint": { "type": "string", "minLength": 1 }, "capabilities": { "type": "array", "items": { "type": "string", "minLength": 1 } }, "trust_level": { "type": "string", "enum": ["low", "medium", "high"] }, @@ -120,8 +120,8 @@ The scheduler MUST select `assigned_to` such that the node’s `node_id` matches | RFC | Title | Relationship | |-----|--------|----------------| -| [RFC 0007](0007-agent-loop-protocol.md) | Agent Loop Protocol | Per-node FSM. | -| [RFC 0026](0026-agent-identity-auth.md) | Agent Identity | Node `agent_id` + authn/z. | +| [RFC 0007](0007-cognitive-pipeline-protocol.md) | Cognitive Pipeline Protocol | Per-node FSM. | +| [RFC 0026](0026-requester-identity-auth.md) | Cognitive pipeline Identity | Node `requester_id` + authn/z. | | [RFC 0047](0047-delegation-extension.md) | Delegation | `delegated_scope` vs proofs. | | [RFC 0048](0048-execution-receipts-audit-envelopes.md) | Execution Receipts | `receipt_refs`. | | [RFC 0016](0016-tool-capability-negotiation.md) | Capability Negotiation | Routing on `capabilities`. | diff --git a/rfcs/0028-agent-to-environment-schema.md b/rfcs/0028-capability-environment-schema.md similarity index 69% rename from rfcs/0028-agent-to-environment-schema.md rename to rfcs/0028-capability-environment-schema.md index 420b7e8..816bc7c 100644 --- a/rfcs/0028-agent-to-environment-schema.md +++ b/rfcs/0028-capability-environment-schema.md @@ -1,4 +1,4 @@ -# RFC 0028 — Agent‑to‑Environment Interaction Schema (v0.1) +# RFC 0028 — Cognitive pipeline‑to‑Environment Interaction Schema (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,7 +8,7 @@ ## 1. Summary -This RFC defines a schema for **agent interactions with external environments**, including: +This RFC defines a schema for **cognitive pipeline interactions with external environments**, including: - simulators - APIs @@ -52,4 +52,4 @@ This RFC defines a schema for **agent interactions with external environments**, ## 5. Conclusion -This RFC defines a unified schema for agent‑environment loops. +This RFC defines a unified schema for cognitive pipeline‑environment loops. diff --git a/rfcs/0029-agent-benchmark-dataset.md b/rfcs/0029-cognitive-benchmark-dataset.md similarity index 66% rename from rfcs/0029-agent-benchmark-dataset.md rename to rfcs/0029-cognitive-benchmark-dataset.md index 6cf6507..0c80c10 100644 --- a/rfcs/0029-agent-benchmark-dataset.md +++ b/rfcs/0029-cognitive-benchmark-dataset.md @@ -1,4 +1,4 @@ -# RFC 0029 — Agent Benchmark Dataset Format (v0.1) +# RFC 0029 — Cognitive pipeline Benchmark Dataset Format (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,17 +8,17 @@ ## 1. Summary -This RFC defines a **benchmark dataset format** for evaluating agents across: +This RFC defines a **benchmark dataset format** for evaluating pipelines across: - reasoning - planning - tool use - safety -- multi‑agent coordination +- multi‑cognitive pipeline coordination It extends: -- RFC 0022 — Agent Evaluation Protocol +- RFC 0022 — Cognitive pipeline Evaluation Protocol --- @@ -57,4 +57,4 @@ It extends: ## 5. Conclusion -This RFC defines a benchmark dataset format for agent evaluation. +This RFC defines a benchmark dataset format for cognitive pipeline evaluation. diff --git a/rfcs/0030-agent-lifecycle-versioning.md b/rfcs/0030-runtime-lifecycle-versioning.md similarity index 71% rename from rfcs/0030-agent-lifecycle-versioning.md rename to rfcs/0030-runtime-lifecycle-versioning.md index 3cda454..212fd55 100644 --- a/rfcs/0030-agent-lifecycle-versioning.md +++ b/rfcs/0030-runtime-lifecycle-versioning.md @@ -1,16 +1,16 @@ -# RFC 0030 — Agent Lifecycle & Versioning, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 +# RFC 0030 — Cognitive pipeline Lifecycle & Versioning, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 **Discussion:** https://github.com/supernovae/open-cot/discussions/30 ## 1. Summary -This RFC defines **agent lifecycle states** and **versioning** of agent configurations for Open-CoT. Long-running governed agents need a model where **configuration can evolve** (prompts, tools, policies) without silently mutating in-flight runs or breaking permission grants. The **`agent_lifecycle`** record binds an **`agent_id`** to a semantic **`version`**, a **`lifecycle_state`**, capability and policy references, and a **`configuration_hash`** for reproducibility. **`version_transition`** documents approved rollout strategies (**rolling**, **blue-green**, **canary**) and whether **rollback** is permitted. +This RFC defines **cognitive pipeline lifecycle states** and **versioning** of cognitive pipeline configurations for Open-CoT. Long-running governed pipelines need a model where **configuration can evolve** (prompts, tools, policies) without silently mutating in-flight runs or breaking permission grants. The **`agent_lifecycle`** record binds an **`requester_id`** to a semantic **`version`**, a **`lifecycle_state`**, capability and policy references, and a **`configuration_hash`** for reproducibility. **`version_transition`** documents approved rollout strategies (**rolling**, **blue-green**, **canary**) and whether **rollback** is permitted. -Lifecycle and versioning intersect **identity** ([RFC 0026](0026-agent-identity-auth.md)), **organizational governance** ([RFC 0044](0044-governance-organizational-controls.md)), and **permissions** that are often scoped to a specific agent version ([RFC 0042](0042-permission-acl.md)). +Lifecycle and versioning intersect **identity** ([RFC 0026](0026-requester-identity-auth.md)), **organizational governance** ([RFC 0044](0044-governance-organizational-controls.md)), and **permissions** that are often scoped to a specific cognitive pipeline version ([RFC 0042](0042-permission-acl.md)). ## 2. Motivation -Operators routinely ship prompt and tool updates weekly, yet auditors require proof of **which binary/configuration** executed a given run. Without explicit lifecycle metadata, “the planner agent” is an ambiguous moving target: ACLs may grant access to a name that no longer matches behavior. Clear **`lifecycle_state`** gates which versions may receive traffic, while **`version_transition`** records who approved a rollout and whether emergency rollback is allowed. +Operators routinely ship prompt and tool updates weekly, yet auditors require proof of **which binary/configuration** executed a given run. Without explicit lifecycle metadata, “the planner cognitive pipeline” is an ambiguous moving target: ACLs may grant access to a name that no longer matches behavior. Clear **`lifecycle_state`** gates which versions may receive traffic, while **`version_transition`** records who approved a rollout and whether emergency rollback is allowed. This RFC does not define CI/CD mechanics, container image formats, or canary metrics collection; it specifies **authoritative records** that control planes and observability systems can store and query. @@ -18,7 +18,7 @@ This RFC does not define CI/CD mechanics, container image formats, or canary met ### 3.1 `agent_lifecycle` -**`agent_id`** is the stable logical identity ([RFC 0026](0026-agent-identity-auth.md)); **`version`** follows semantic versioning for human expectations but MUST be treated as an opaque string for matching grants. **`lifecycle_state`** values: +**`requester_id`** is the stable logical identity ([RFC 0026](0026-requester-identity-auth.md)); **`version`** follows semantic versioning for human expectations but MUST be treated as an opaque string for matching grants. **`lifecycle_state`** values: | State | Meaning | |-------|---------| @@ -28,7 +28,7 @@ This RFC does not define CI/CD mechanics, container image formats, or canary met | `deprecated` | Still runnable for compatibility but SHOULD not start new long-lived sessions; migrations encouraged. | | `retired` | MUST NOT schedule new work; historical traces remain addressable. | -**`created_at`** / **`observed_at`** are RFC 3339 timestamps. **`configuration_hash`** hashes the canonical serialized bundle (system prompt, tool allow list, model route, feature flags) so two hosts can verify they run identical configs. **`capabilities[]`** mirrors outward-facing skills for routing ([RFC 0021](0021-agent-capability-declaration.md) may elaborate). **`policy_refs[]`** lists attached policy documents or snapshots ([RFC 0041](0041-policy-enforcement-schema.md)). **`governance_ref`** points to organizational controls ([RFC 0044](0044-governance-organizational-controls.md))—team ownership, data classes, approval workflow ids. +**`created_at`** / **`observed_at`** are RFC 3339 timestamps. **`configuration_hash`** hashes the canonical serialized bundle (system prompt, tool allow list, model route, feature flags) so two hosts can verify they run identical configs. **`capabilities[]`** mirrors outward-facing skills for routing ([RFC 0021](0021-capability-declaration.md) may elaborate). **`policy_refs[]`** lists attached policy documents or snapshots ([RFC 0041](0041-policy-enforcement-schema.md)). **`governance_ref`** points to organizational controls ([RFC 0044](0044-governance-organizational-controls.md))—team ownership, data classes, approval workflow ids. ### 3.2 `version_transition` @@ -44,14 +44,14 @@ Permission grants ([RFC 0042](0042-permission-acl.md)) SHOULD include optional ` ```json { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://opencot.dev/schema/agent-lifecycle/v0.2", - "title": "Open CoT RFC 0030 — Agent Lifecycle", + "$id": "https://opencot.dev/schema/runtime-lifecycle/v0.2", + "title": "Open CoT RFC 0030 — Cognitive pipeline Lifecycle", "definitions": { "agent_lifecycle": { "type": "object", "additionalProperties": false, "required": [ - "agent_id", + "requester_id", "version", "lifecycle_state", "created_at", @@ -62,7 +62,7 @@ Permission grants ([RFC 0042](0042-permission-acl.md)) SHOULD include optional ` "governance_ref" ], "properties": { - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "version": { "type": "string", "minLength": 1 }, "lifecycle_state": { "type": "string", @@ -118,11 +118,11 @@ Permission grants ([RFC 0042](0042-permission-acl.md)) SHOULD include optional ` ## 5. Examples -### 5.1 Agent lifecycle record +### 5.1 Cognitive pipeline lifecycle record ```json { - "agent_id": "com.opencot.support.triage", + "requester_id": "com.opencot.support.triage", "version": "3.6.0", "lifecycle_state": "active", "created_at": "2026-03-01T09:00:00Z", @@ -155,11 +155,11 @@ Permission grants ([RFC 0042](0042-permission-acl.md)) SHOULD include optional ` | RFC | Title | Relationship | |-----|--------|----------------| -| [RFC 0026](0026-agent-identity-auth.md) | Agent Identity | Stable `agent_id` and authentication for lifecycle APIs. | +| [RFC 0026](0026-requester-identity-auth.md) | Cognitive pipeline Identity | Stable `requester_id` and authentication for lifecycle APIs. | | [RFC 0044](0044-governance-organizational-controls.md) | Governance | `governance_ref` and approval workflows for transitions. | -| [RFC 0042](0042-permission-acl.md) | Permissions & ACL | Grants may be pinned to `agent_id` + `version`. | +| [RFC 0042](0042-permission-acl.md) | Permissions & ACL | Grants may be pinned to `requester_id` + `version`. | | [RFC 0041](0041-policy-enforcement-schema.md) | Policy Enforcement | `policy_refs` attach evaluation snapshots to versions. | -| [RFC 0021](0021-agent-capability-declaration.md) | Capability Declaration | `capabilities` alignment for discovery. | +| [RFC 0021](0021-capability-declaration.md) | Capability Declaration | `capabilities` alignment for discovery. | ## 7. Open Questions Resolution @@ -172,6 +172,6 @@ Permission grants ([RFC 0042](0042-permission-acl.md)) SHOULD include optional ` ## 8. Acceptance Criteria 1. Every stored **`agent_lifecycle`** record validates against the schema and includes `configuration_hash`, `policy_refs`, and `governance_ref`. -2. Runs log the **`agent_id`** and **`version`** pair actually executed, enabling ACL checks per [RFC 0042](0042-permission-acl.md). +2. Runs log the **`requester_id`** and **`version`** pair actually executed, enabling ACL checks per [RFC 0042](0042-permission-acl.md). 3. **`version_transition`** records exist for production-impacting changes when [RFC 0044](0044-governance-organizational-controls.md) mandates approvals, including `migration_strategy` and `rollback_allowed`. 4. **`retired`** versions cannot be selected for new assignments by conforming schedulers without an explicit out-of-band exception flag (documented locally, not part of this schema). diff --git a/rfcs/0031-agent-observability-telemtry.md b/rfcs/0031-cognitive-observability-telemetry.md similarity index 81% rename from rfcs/0031-agent-observability-telemtry.md rename to rfcs/0031-cognitive-observability-telemetry.md index 3fe4729..b78eecf 100644 --- a/rfcs/0031-agent-observability-telemtry.md +++ b/rfcs/0031-cognitive-observability-telemetry.md @@ -1,4 +1,4 @@ -# RFC 0031 — Agent Observability & Telemetry (v0.2) +# RFC 0031 — Cognitive pipeline Observability & Telemetry (v0.2) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026-04-14 @@ -8,12 +8,12 @@ ## 1. Summary -This RFC defines telemetry and observability fields for reasoning agents. +This RFC defines telemetry and observability fields for reasoning pipelines. It extends: -- RFC 0007 — Agent Loop Protocol -- RFC 0022 — Agent Evaluation Protocol +- RFC 0007 — Cognitive Pipeline Protocol +- RFC 0022 — Cognitive pipeline Evaluation Protocol --- @@ -32,11 +32,11 @@ It extends: ```json { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0031 — Agent Observability and Telemetry", + "title": "Open CoT RFC 0031 — Cognitive pipeline Observability and Telemetry", "type": "object", "properties": { "version": { "type": "string", "enum": ["0.2"] }, - "agent_id": { "type": "string" }, + "requester_id": { "type": "string" }, "observed_at": { "type": "string", "format": "date-time" }, "ordering": { "type": "object", @@ -59,7 +59,7 @@ It extends: "additionalProperties": true } }, - "required": ["version", "agent_id", "observed_at", "ordering", "metrics"] + "required": ["version", "requester_id", "observed_at", "ordering", "metrics"] } ``` @@ -70,7 +70,7 @@ It extends: ```json { "version": "0.2", - "agent_id": "planner", + "requester_id": "planner", "observed_at": "2026-04-14T11:30:00Z", "ordering": { "event_seq": 1042 diff --git a/rfcs/0032-agent-deployment-manifest.md b/rfcs/0032-runtime-deployment-manifest.md similarity index 77% rename from rfcs/0032-agent-deployment-manifest.md rename to rfcs/0032-runtime-deployment-manifest.md index c20b1d2..ba41fa3 100644 --- a/rfcs/0032-agent-deployment-manifest.md +++ b/rfcs/0032-runtime-deployment-manifest.md @@ -1,10 +1,10 @@ -# RFC 0032 — Agent Deployment Manifest, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 +# RFC 0032 — Cognitive pipeline Deployment Manifest, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 **Discussion:** https://github.com/supernovae/open-cot/discussions/32 ## 1. Summary -This RFC defines the **`deployment_manifest`**: a declarative document for running Open-CoT–governed agents with fixed runtime, resources, policy bindings, governance reference, sandbox ([RFC 0017](0017-agent-safety-sandboxing.md)), default budgets ([RFC 0038](0038-cost-aware-reasoning-budget.md)), tools, dependencies, health checks, and scaling. It is **infrastructure-as-code** for governed agents so rollouts are reproducible and provably aligned with org policy before the FSM ([RFC 0007](0007-agent-loop-protocol.md)) starts. +This RFC defines the **`deployment_manifest`**: a declarative document for running Open-CoT–governed pipelines with fixed runtime, resources, policy bindings, governance reference, sandbox ([RFC 0017](0017-runtime-safety-sandboxing.md)), default budgets ([RFC 0038](0038-cost-aware-reasoning-budget.md)), tools, dependencies, health checks, and scaling. It is **infrastructure-as-code** for governed pipelines so rollouts are reproducible and provably aligned with org policy before the FSM ([RFC 0007](0007-cognitive-pipeline-protocol.md)) starts. ## 2. Motivation @@ -16,7 +16,7 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg | Field | Role | |-------|------| -| `manifest_id` | Stable id for this manifest revision (≠ `agent_id`). | +| `manifest_id` | Stable id for this manifest revision (≠ `requester_id`). | | `runtime` | OCI `container_image`, optional `entrypoint`, `environment` (no secrets). | | `resources` | `cpu`, `memory`, `gpu` (gpu MAY be fractional). | | `policies[]` | Ordered policy bundle ids ([RFC 0041](0041-policy-enforcement-schema.md)). | @@ -24,7 +24,7 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg | `sandbox_config` | RFC 0017 object; MUST NOT widen beyond governance. | | `budget_defaults` | RFC 0038 defaults; per-run overrides if policy allows. | | `tool_allowlist[]` | Deployed tools; intersected with sandbox + policy. | -| `dependencies[]` | Agents, datasets, tool packs, models. | +| `dependencies[]` | Pipelines, datasets, tool packs, models. | | `health_check` | Probe (`http` \| `tcp` \| `exec` \| `grpc`); orchestrator may extend. | | `scaling` | `min_replicas`, `max_replicas`. | @@ -35,7 +35,7 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://opencot.dev/schema/rfc0032/deployment-manifest.json", - "title": "Open CoT RFC 0032 — Agent Deployment Manifest", + "title": "Open CoT RFC 0032 — Cognitive pipeline Deployment Manifest", "type": "object", "additionalProperties": false, "$defs": { @@ -86,7 +86,7 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg "additionalProperties": false, "properties": { "ref": { "type": "string", "minLength": 1 }, - "kind": { "type": "string", "enum": ["agent", "dataset", "tool_pack", "model", "other"] }, + "kind": { "type": "string", "enum": ["pipeline", "dataset", "tool_pack", "model", "other"] }, "version_constraint": { "type": "string" } }, "required": ["ref", "kind"] @@ -97,7 +97,7 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg "properties": { "schema_version": { "type": "string", "enum": ["0.1"] }, "manifest_id": { "type": "string", "minLength": 1 }, - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "version": { "type": "string", "minLength": 1 }, "runtime": { "$ref": "#/$defs/runtime" }, "resources": { "$ref": "#/$defs/resources" }, @@ -111,7 +111,7 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg "scaling": { "$ref": "#/$defs/scaling" }, "metadata": { "type": "object", "additionalProperties": { "type": "string" } } }, - "required": ["schema_version", "manifest_id", "agent_id", "version", "runtime", "policies", "governance_ref", "sandbox_config", "budget_defaults", "tool_allowlist", "scaling"] + "required": ["schema_version", "manifest_id", "requester_id", "version", "runtime", "policies", "governance_ref", "sandbox_config", "budget_defaults", "tool_allowlist", "scaling"] } }, "properties": { "deployment_manifest": { "$ref": "#/$defs/deployment_manifest" } }, @@ -122,23 +122,23 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg ## 5. Examples -**Code-assistant agent** — strict sandbox, org governance, hard budget, repo tools, GPU for embeddings. +**Code-assistant cognitive pipeline** — strict sandbox, org governance, hard budget, repo tools, GPU for embeddings. ```json { "deployment_manifest": { "schema_version": "0.1", "manifest_id": "dm_acme_codeassist_2026q2_14", - "agent_id": "agent:org/acme/code-assistant", + "requester_id": "cognitive-pipeline:org/acme/code-assistant", "version": "2.4.1", "runtime": { - "container_image": "registry.acme.example/agents/code-assistant:2.4.1", - "entrypoint": ["/opt/opencot/bin/agentd", "--config", "/etc/opencot/agent.yaml"], + "container_image": "registry.acme.example/pipelines/code-assistant:2.4.1", + "entrypoint": ["/opt/opencot/bin/opencotd", "--config", "/etc/opencot/cognitive-pipeline.yaml"], "environment": { "OPENCOT_LOG_LEVEL": "info" } }, "resources": { "cpu": "4", "memory": "16Gi", "gpu": 1 }, "policies": ["policy_bundle:acme/base", "policy_bundle:acme/code_assistant_prod"], - "governance_ref": "gov://acme/prod/eu-west/code_agents", + "governance_ref": "gov://acme/prod/eu-west/code_pipelines", "sandbox_config": { "allowed_tools": ["repo.read", "repo.search", "linter.run", "tests.run", "patch.propose"], "blocked_tools": ["shell", "network_raw", "secrets.read"], @@ -153,14 +153,14 @@ Scattered Helm values and shell exports hide which policies, sandboxes, and budg "dependencies": [{ "ref": "dataset:acme/styleguide-embed", "kind": "dataset", "version_constraint": "^3" }], "health_check": { "kind": "http", "path": "/healthz", "port": 8080, "interval_seconds": 10, "timeout_seconds": 2, "success_threshold": 1, "failure_threshold": 3 }, "scaling": { "min_replicas": 2, "max_replicas": 20 }, - "metadata": { "team": "platform-agents", "region": "eu-west-1" } + "metadata": { "team": "platform-pipelines", "region": "eu-west-1" } } } ``` ## 6. Cross-references -[RFC 0007](0007-agent-loop-protocol.md) · [RFC 0017](0017-agent-safety-sandboxing.md) · [RFC 0038](0038-cost-aware-reasoning-budget.md) · [RFC 0041](0041-policy-enforcement-schema.md) · [RFC 0044](0044-governance-organizational-controls.md) +[RFC 0007](0007-cognitive-pipeline-protocol.md) · [RFC 0017](0017-runtime-safety-sandboxing.md) · [RFC 0038](0038-cost-aware-reasoning-budget.md) · [RFC 0041](0041-policy-enforcement-schema.md) · [RFC 0044](0044-governance-organizational-controls.md) ## 7. Open Questions Resolution diff --git a/rfcs/0033-agent-security-threat-model.md b/rfcs/0033-runtime-security-threat-model.md similarity index 80% rename from rfcs/0033-agent-security-threat-model.md rename to rfcs/0033-runtime-security-threat-model.md index 5a164b3..3dce76c 100644 --- a/rfcs/0033-agent-security-threat-model.md +++ b/rfcs/0033-runtime-security-threat-model.md @@ -1,10 +1,10 @@ -# RFC 0033 — Agent Security & Threat Model, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 +# RFC 0033 — Cognitive pipeline Security & Threat Model, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 **Discussion:** https://github.com/supernovae/open-cot/discussions/33 ## 1. Summary -This RFC documents the **security threat landscape** for governed agents on the Open-CoT cognitive control plane and explains how architectural choices—especially the separation of **proposal** (model) from **authorization** (harness, policy, brokers)—reduce impact for each major threat class. It introduces a machine-readable **`threat_catalog`**: a versioned collection of **`threat_entry`** records linking each threat to severities, mitigations (by RFC or mechanism), and honest **residual risk** statements for security reviewers and auditors. +This RFC documents the **security threat landscape** for governed pipelines on the Open-CoT cognitive control plane and explains how architectural choices—especially the separation of **proposal** (model) from **authorization** (harness, policy, brokers)—reduce impact for each major threat class. It introduces a machine-readable **`threat_catalog`**: a versioned collection of **`threat_entry`** records linking each threat to severities, mitigations (by RFC or mechanism), and honest **residual risk** statements for security reviewers and auditors. ## 2. Motivation @@ -21,7 +21,7 @@ We protect **organizational data**, **downstream systems reachable by tools**, * | Boundary | Trusts | Does not trust | |----------|--------|----------------| | Model | Capability to suggest plans and text. | Self-judgment of legality, scope, or safety; any narrative claiming urgency or override. | -| Harness | Verified identity ([RFC 0026](0026-agent-identity-auth.md)), trace binding, schema validation, FSM transitions ([RFC 0007](0007-agent-loop-protocol.md)). | Raw model JSON without normalization and size limits. | +| Harness | Verified identity ([RFC 0026](0026-requester-identity-auth.md)), trace binding, schema validation, FSM transitions ([RFC 0007](0007-cognitive-pipeline-protocol.md)). | Raw model JSON without normalization and size limits. | | Policy engine | Rule evaluation, obligation logs ([RFC 0041](0041-policy-enforcement-schema.md)). | Model-authored “policy” fields. | | Permissions / delegation | Stored grants, audience-bound receipts ([RFC 0042](0042-permission-acl.md), [RFC 0047](0047-delegation-extension.md)). | Tool-selected scope expansion. | | Tools & hosts | Correct implementation when inputs stay within granted scope. | Arbitrary URLs or paths offered by the model without validation. | @@ -29,7 +29,7 @@ We protect **organizational data**, **downstream systems reachable by tools**, * ### 3.3 How the governed FSM mitigates key threats -**Prompt injection** — Outputs are proposals; FSM blocks tools without authority ([RFC 0007](0007-agent-loop-protocol.md), [RFC 0047](0047-delegation-extension.md)). **Privilege escalation** — Model cannot author `delegation_decision` / `authority_receipt`. **Confused deputy** — Audience-bound scopes/receipts ([RFC 0042](0042-permission-acl.md), [RFC 0047](0047-delegation-extension.md)). **Data exfiltration** — Narrowing + sandbox allowlists ([RFC 0017](0017-agent-safety-sandboxing.md)). **Replay** — Hashed/signed receipts ([RFC 0048](0048-execution-receipts-audit-envelopes.md)). +**Prompt injection** — Outputs are proposals; FSM blocks tools without authority ([RFC 0007](0007-cognitive-pipeline-protocol.md), [RFC 0047](0047-delegation-extension.md)). **Privilege escalation** — Model cannot author `delegation_decision` / `authority_receipt`. **Confused deputy** — Audience-bound scopes/receipts ([RFC 0042](0042-permission-acl.md), [RFC 0047](0047-delegation-extension.md)). **Data exfiltration** — Narrowing + sandbox allowlists ([RFC 0017](0017-runtime-safety-sandboxing.md)). **Replay** — Hashed/signed receipts ([RFC 0048](0048-execution-receipts-audit-envelopes.md)). ### 3.4 Threat catalog record (`threat_entry`) @@ -111,8 +111,8 @@ Each entry uses a stable **`threat_id`**, a **`category`** enum aligned with com { "threat_catalog": { "schema_version": "0.1", - "catalog_id": "tc_acme_agents_core_2026q2", - "title": "ACME governed agents — baseline threats", + "catalog_id": "tc_acme_pipelines_core_2026q2", + "title": "ACME governed pipelines — baseline threats", "entries": [ { "threat_id": "THR-PROMPT-INJECT-001", @@ -164,7 +164,7 @@ Each entry uses a stable **`threat_id`**, a **`category`** enum aligned with com ## 6. Cross-references -[RFC 0007](0007-agent-loop-protocol.md) · [RFC 0017](0017-agent-safety-sandboxing.md) · [RFC 0026](0026-agent-identity-auth.md) · [RFC 0041](0041-policy-enforcement-schema.md) · [RFC 0042](0042-permission-acl.md) · [RFC 0044](0044-governance-organizational-controls.md) · [RFC 0047](0047-delegation-extension.md) · [RFC 0048](0048-execution-receipts-audit-envelopes.md) +[RFC 0007](0007-cognitive-pipeline-protocol.md) · [RFC 0017](0017-runtime-safety-sandboxing.md) · [RFC 0026](0026-requester-identity-auth.md) · [RFC 0041](0041-policy-enforcement-schema.md) · [RFC 0042](0042-permission-acl.md) · [RFC 0044](0044-governance-organizational-controls.md) · [RFC 0047](0047-delegation-extension.md) · [RFC 0048](0048-execution-receipts-audit-envelopes.md) ## 7. Open Questions Resolution diff --git a/rfcs/0034-agent-federation-protocol.md b/rfcs/0034-cognitive-federation-protocol.md similarity index 72% rename from rfcs/0034-agent-federation-protocol.md rename to rfcs/0034-cognitive-federation-protocol.md index 10c39f9..b13bc8b 100644 --- a/rfcs/0034-agent-federation-protocol.md +++ b/rfcs/0034-cognitive-federation-protocol.md @@ -1,24 +1,24 @@ -# RFC 0034 — Agent Federation Protocol, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 +# RFC 0034 — Cognitive pipeline Federation Protocol, Status: Draft, Author: Open CoT Community, Created: 2026-04-14 **Discussion:** https://github.com/supernovae/open-cot/discussions/34 ## 1. Summary -This RFC defines how **independent Open-CoT deployments** (“peers”) interoperate when agents must delegate work across organizational or network boundaries while **retaining local policy sovereignty**. Each peer runs its own policy engine and identity plane; federation adds a **trust framework** for verifying peers, constraining accepted delegation scopes, and exchanging **`federation_request`** / **`federation_response`** messages that embed the standard **`delegation_request`** and **`delegation_decision`** objects from [RFC 0047](0047-delegation-extension.md). The result is cross-tenant collaboration without a single global “god” policy service—only negotiated trust and cryptographic verification. +This RFC defines how **independent Open-CoT deployments** (“peers”) interoperate when pipelines must delegate work across organizational or network boundaries while **retaining local policy sovereignty**. Each peer runs its own policy engine and identity plane; federation adds a **trust framework** for verifying peers, constraining accepted delegation scopes, and exchanging **`federation_request`** / **`federation_response`** messages that embed the standard **`delegation_request`** and **`delegation_decision`** objects from [RFC 0047](0047-delegation-extension.md). The result is cross-tenant collaboration without a single global “god” policy service—only negotiated trust and cryptographic verification. ## 2. Motivation -Multi-cluster and multi-company agent workflows are inevitable (support handoffs, joint research, supply-chain automation). Naïvely forwarding API keys or model prompts between parties collapses auditability and explodes confused-deputy risk. Federation needs: **stable peer identities**, **graded trust levels**, **scope caps**, **TTL limits**, **delegation path transparency** (`trust_chain[]`), and **tamper-evident responses** so downstream executors can prove which peer authorized what. +Multi-cluster and multi-company cognitive pipeline workflows are inevitable (support handoffs, joint research, supply-chain automation). Naïvely forwarding API keys or model prompts between parties collapses auditability and explodes confused-deputy risk. Federation needs: **stable peer identities**, **graded trust levels**, **scope caps**, **TTL limits**, **delegation path transparency** (`trust_chain[]`), and **tamper-evident responses** so downstream executors can prove which peer authorized what. ## 3. Design -**Roles:** Source peer signs and sends `federation_request` for hosted agents ([RFC 0026](0026-agent-identity-auth.md)). Target peer evaluates locally and maps foreign scopes—never inherits source policy verbatim. Optional trust coordinator tracks `last_verified_at` / key rotation. +**Roles:** Source peer signs and sends `federation_request` for hosted pipelines ([RFC 0026](0026-requester-identity-auth.md)). Target peer evaluates locally and maps foreign scopes—never inherits source policy verbatim. Optional trust coordinator tracks `last_verified_at` / key rotation. **Trust levels:** `untrusted` (crypto id only; often needs human gate), `verified` (due diligence + contract), `trusted` (automation)—all still cap TTL and intersect `accepted_scopes[]`. **Flow:** (1) Harness builds `delegation_request` for remote work. (2) Gateway wraps it with `trust_chain` (start `[source_peer_id]`). (3) Target policy emits `federation_response` + `delegation_decision`. (4) Broker may mint `authority_receipt` ([RFC 0047](0047-delegation-extension.md)); execution receipts SHOULD cite federation + delegation ids ([RFC 0048](0048-execution-receipts-audit-envelopes.md)). (5) Multi-hop appends peers; each hop re-signs. -**Vs. [RFC 0027](0027-distributed-agent-execution-protocol.md):** 0027 is routing/topology; this RFC is the cross-admin trust and delegation envelope (orthogonal transport headers). +**Vs. [RFC 0027](0027-distributed-execution-protocol.md):** 0027 is routing/topology; this RFC is the cross-admin trust and delegation envelope (orthogonal transport headers). ## 4. JSON Schema @@ -27,7 +27,7 @@ Multi-cluster and multi-company agent workflows are inevitable (support handoffs { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://opencot.dev/schema/rfc0034/federation.json", - "title": "Open CoT RFC 0034 — Agent Federation Protocol", + "title": "Open CoT RFC 0034 — Cognitive pipeline Federation Protocol", "type": "object", "additionalProperties": false, "$defs": { @@ -117,7 +117,7 @@ Multi-cluster and multi-company agent workflows are inevitable (support handoffs "delegation_request": { "schema_version": "0.1", "request_id": "dr_sensor_pull_441a", - "requester": "agent:org/lab/planner-alpha", + "requester": "cognitive-pipeline:org/lab/planner-alpha", "run_id": "run_20260418_0930", "timestamp": "2026-04-18T09:30:00Z", "intent": "Fetch last 24h of air quality samples for site bundle S-12", @@ -140,7 +140,7 @@ Target responds with `status` ∈ {`accepted`,`narrowed`} (or `rejected`), `dele ## 6. Cross-references -[RFC 0026](0026-agent-identity-auth.md) · [RFC 0027](0027-distributed-agent-execution-protocol.md) · [RFC 0047](0047-delegation-extension.md) · [RFC 0048](0048-execution-receipts-audit-envelopes.md) +[RFC 0026](0026-requester-identity-auth.md) · [RFC 0027](0027-distributed-execution-protocol.md) · [RFC 0047](0047-delegation-extension.md) · [RFC 0048](0048-execution-receipts-audit-envelopes.md) ## 7. Open Questions Resolution @@ -152,4 +152,4 @@ Target responds with `status` ∈ {`accepted`,`narrowed`} (or `rejected`), `dele ## 8. Acceptance Criteria -`delegation_request.requester` MUST resolve in the source peer registry ([RFC 0026](0026-agent-identity-auth.md)). Target rejects TTL above `max_delegation_ttl_seconds` for source. `federation_response.delegation_decision.request_id` MUST equal nested `delegation_request.request_id`. `response_integrity.content_hash` MUST cover `request_id`, `status`, and canonical `delegation_decision`. +`delegation_request.requester` MUST resolve in the source peer registry ([RFC 0026](0026-requester-identity-auth.md)). Target rejects TTL above `max_delegation_ttl_seconds` for source. `federation_response.delegation_decision.request_id` MUST equal nested `delegation_request.request_id`. `response_integrity.content_hash` MUST cover `request_id`, `status`, and canonical `delegation_decision`. diff --git a/rfcs/0035-data-provenance-tracking.md b/rfcs/0035-data-provenance-tracking.md index 93e4710..b7037ac 100644 --- a/rfcs/0035-data-provenance-tracking.md +++ b/rfcs/0035-data-provenance-tracking.md @@ -12,9 +12,9 @@ This RFC defines provenance and integrity metadata for Open CoT artifacts, inclu It extends: -- RFC 0010 — Agent Memory Schema +- RFC 0010 — Cognitive pipeline Memory Schema - RFC 0020 — Verifiable Scratchpad Compression -- RFC 0022 — Agent Evaluation Protocol +- RFC 0022 — Cognitive pipeline Evaluation Protocol --- @@ -24,7 +24,7 @@ Required provenance dimensions: - source identity - transformation chain -- actor (agent/tool) identity +- actor (cognitive-pipeline/tool) identity - timestamp and pipeline stage Integrity additions: @@ -47,7 +47,7 @@ Integrity additions: "artifact_id": { "type": "string" }, "artifact_type": { "type": "string" }, "source": { "type": "string" }, - "agent_id": { "type": "string" }, + "requester_id": { "type": "string" }, "tool_id": { "type": "string" }, "timestamp": { "type": "string", "format": "date-time" }, "transformation": { "type": "string" }, @@ -78,7 +78,7 @@ Integrity additions: "artifact_id": "trace_001", "artifact_type": "reasoning_trace", "source": "synthetic_seed_v0", - "agent_id": "planner", + "requester_id": "planner", "timestamp": "2026-04-14T12:00:00Z", "transformation": "converted_from_gsm8k_minimal", "parent_artifact_ids": ["raw_qa_001"], diff --git a/rfcs/0036-agent-native-compression-delta.md b/rfcs/0036-cognitive-native-compression-delta.md similarity index 71% rename from rfcs/0036-agent-native-compression-delta.md rename to rfcs/0036-cognitive-native-compression-delta.md index 004d6e0..c272bc2 100644 --- a/rfcs/0036-agent-native-compression-delta.md +++ b/rfcs/0036-cognitive-native-compression-delta.md @@ -1,4 +1,4 @@ -# RFC 0036 — Agent‑Native Compression & Delta Sync (v0.1) +# RFC 0036 — Cognitive pipeline‑Native Compression & Delta Sync (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,12 +8,12 @@ ## 1. Summary -This RFC defines **agent‑native compression and delta synchronization**, enabling: +This RFC defines **cognitive pipeline‑native compression and delta synchronization**, enabling: - efficient memory sync - distributed scratchpad updates - incremental state transfer -- low‑bandwidth multi‑agent collaboration +- low‑bandwidth multi‑cognitive pipeline collaboration It extends: @@ -57,4 +57,4 @@ It extends: ## 5. Conclusion -This RFC defines efficient delta‑based synchronization for agent ecosystems. +This RFC defines efficient delta‑based synchronization for cognitive pipeline ecosystems. diff --git a/rfcs/0037-token-economy-cost-modeling.md b/rfcs/0037-token-economy-cost-modeling.md index 3a12213..4879daf 100644 --- a/rfcs/0037-token-economy-cost-modeling.md +++ b/rfcs/0037-token-economy-cost-modeling.md @@ -1,35 +1,35 @@ # RFC 0037 — Token Economy & Cost Modeling (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.7 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.7 **Discussion:** https://github.com/supernovae/open-cot/discussions/37 --- ## 1. Summary -This RFC defines a **token economy and cost modeling standard** for reasoning agents, enabling: +This RFC defines a **token economy and cost modeling standard** for reasoning pipelines, enabling: -- token budgeting -- cost‑aware planning -- cost‑aware tool selection -- economic constraints on CoT expansion -- predictable inference costs +- token budgeting +- cost‑aware planning +- cost‑aware tool selection +- economic constraints on CoT expansion +- predictable inference costs It integrates with: -- RFC 0007 — Agent Loop Protocol -- RFC 0020 — Scratchpad Compression +- RFC 0007 — Cognitive Pipeline Protocol +- RFC 0020 — Scratchpad Compression --- ## 2. Cost Components -- **model_cost** — tokens in/out -- **tool_cost** — per‑call cost -- **memory_cost** — read/write cost -- **branch_cost** — cost per branch -- **verification_cost** — verifier calls +- **model_cost** — tokens in/out +- **tool_cost** — per‑call cost +- **memory_cost** — read/write cost +- **branch_cost** — cost per branch +- **verification_cost** — verifier calls --- diff --git a/rfcs/0038-cost-aware-reasoning-budget.md b/rfcs/0038-cost-aware-reasoning-budget.md index 648cc11..d1eb218 100644 --- a/rfcs/0038-cost-aware-reasoning-budget.md +++ b/rfcs/0038-cost-aware-reasoning-budget.md @@ -8,7 +8,7 @@ ## 1. Summary -This RFC defines **cost‑aware reasoning**, enabling agents to: +This RFC defines **cost‑aware reasoning**, enabling pipelines to: - reason under token budgets - prune branches based on cost @@ -38,7 +38,7 @@ It extends: { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Open CoT RFC 0038 — Cost-Aware Reasoning Budget", - "description": "Budget policy and snapshot types for enforcing token, cost, step, tool-call, and retry limits on agent loops.", + "description": "Budget policy and snapshot types for enforcing token, cost, step, tool-call, and retry limits on cognitive pipelines.", "type": "object", "properties": { "budget": { @@ -53,7 +53,7 @@ It extends: "max_cost": { "type": "number", "minimum": 0, - "description": "Maximum dollar cost for the entire agent run." + "description": "Maximum dollar cost for the entire cognitive pipeline run." }, "max_steps": { "type": "integer", @@ -76,7 +76,7 @@ It extends: "enforcement": { "type": "string", "enum": ["hard", "soft", "warn"], - "description": "How the budget is enforced. 'hard' force-stops the agent, 'soft' logs warnings, 'warn' emits telemetry only." + "description": "How the budget is enforced. 'hard' force-stops the cognitive pipeline, 'soft' logs warnings, 'warn' emits telemetry only." }, "snapshot": { "type": "object", @@ -127,4 +127,4 @@ It extends: ## 5. Conclusion -This RFC defines how agents reason within explicit economic constraints. +This RFC defines how pipelines reason within explicit economic constraints. diff --git a/rfcs/0039-tool-cost-modeling-biling.md b/rfcs/0039-tool-cost-modeling-biling.md index 4933fad..8fe11d1 100644 --- a/rfcs/0039-tool-cost-modeling-biling.md +++ b/rfcs/0039-tool-cost-modeling-biling.md @@ -1,8 +1,8 @@ # RFC 0039 — Tool Cost Modeling & Billing Semantics (v0.1) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026‑04‑14 -**Target Version:** Schema v0.7 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026‑04‑14 +**Target Version:** Schema v0.7 **Discussion:** https://github.com/supernovae/open-cot/discussions/39 --- @@ -10,26 +10,26 @@ This RFC defines **cost modeling for tools**, enabling: -- per‑call billing -- per‑token billing -- per‑operation billing -- cost negotiation -- cost‑aware tool selection +- per‑call billing +- per‑token billing +- per‑operation billing +- cost negotiation +- cost‑aware tool selection It extends: -- RFC 0003 — Tool Invocation Schema -- RFC 0025 — Tool Marketplace Registry +- RFC 0003 — Tool Invocation Schema +- RFC 0025 — Tool Marketplace Registry --- ## 2. Cost Models -- **flat** — fixed per call -- **per_token** — based on input/output size -- **tiered** — volume‑based -- **dynamic** — surge pricing -- **negotiated** — multi‑agent negotiation +- **flat** — fixed per call +- **per_token** — based on input/output size +- **tiered** — volume‑based +- **dynamic** — surge pricing +- **negotiated** — multi‑cognitive pipeline negotiation --- diff --git a/rfcs/0040-multi-agent-economic-incentives.md b/rfcs/0040-multi-party-economic-incentives.md similarity index 69% rename from rfcs/0040-multi-agent-economic-incentives.md rename to rfcs/0040-multi-party-economic-incentives.md index d721fa6..88094b5 100644 --- a/rfcs/0040-multi-agent-economic-incentives.md +++ b/rfcs/0040-multi-party-economic-incentives.md @@ -1,4 +1,4 @@ -# RFC 0040 — Multi‑Agent Economic Incentives (v0.1) +# RFC 0040 — Multi‑Cognitive pipeline Economic Incentives (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026‑04‑14 @@ -8,7 +8,7 @@ ## 1. Summary -This RFC defines **economic incentive structures** for multi‑agent systems, enabling: +This RFC defines **economic incentive structures** for multi‑cognitive pipeline systems, enabling: - cooperative incentives - competitive incentives @@ -18,8 +18,8 @@ This RFC defines **economic incentive structures** for multi‑agent systems, en It extends: -- RFC 0011 — Multi‑Agent Protocol -- RFC 0015 — Multi‑Agent Reward Sharing +- RFC 0011 — Multi‑Cognitive pipeline Protocol +- RFC 0015 — Multi‑Cognitive pipeline Reward Sharing --- @@ -60,4 +60,4 @@ It extends: ## 5. Conclusion -This RFC defines economic coordination for multi‑agent reasoning. +This RFC defines economic coordination for multi‑cognitive pipeline reasoning. diff --git a/rfcs/0041-policy-enforcement-schema.md b/rfcs/0041-policy-enforcement-schema.md index 4d8380d..b829f29 100644 --- a/rfcs/0041-policy-enforcement-schema.md +++ b/rfcs/0041-policy-enforcement-schema.md @@ -10,7 +10,7 @@ ## 1. Summary -This RFC defines the **Policy Enforcement Schema (v0.3)** for Open-CoT, a cognitive control plane for governed agent execution. The policy engine decides when a model may invoke tools, access data or memory, or perform other governed operations. Given a **delegation request** (RFC 0047) and active policies, it returns **`allow`**, **`deny`**, **`narrow`**, or **`require_approval`**, with optional `narrowing` constraints, denial reasons, or escalation targets. +This RFC defines the **Policy Enforcement Schema (v0.3)** for Open-CoT, a cognitive control plane for governed cognitive pipeline execution. The policy engine evaluates when a model may invoke tools, access data or memory, or perform other governed operations. Given a **delegation request** (RFC 0047) and active policies, it returns **`allow`**, **`deny`**, **`narrow`**, or **`require_approval`**, with optional `narrowing` constraints, denial reasons, or escalation targets. v0.3 preserves **`narrow`** and introduces canonical temporal naming from RFC 0051: policy validity bounds use `effective_at` / `expires_at`, condition windows use `validity_window`, and evaluation records use `decided_at`. It formalizes deterministic temporal validity semantics while retaining composable narrowing and policy priority behavior. @@ -26,9 +26,9 @@ Operators need typed policies, graduated responses (`narrow`, `require_approval` | RFC | Title | Relationship | |-----|--------|----------------| -| RFC 0007 | Agent Loop Protocol | Governed FSM: policy consulted in frame, plan, validate_authority, observe_result, critique_verify, finalize. | -| RFC 0017 | Agent Safety & Sandboxing | This RFC supersedes simple allow/block lists with structured rules, narrowing, and evaluation records. | -| RFC 0026 | Agent Identity & Authentication | **Subject** identities and roles for `subject` matching. | +| RFC 0007 | Cognitive Pipeline Protocol | Governed FSM: policy consulted in frame, plan, validate_authority, observe_result, critique_verify, finalize. | +| RFC 0017 | Cognitive pipeline Safety & Sandboxing | This RFC supersedes simple allow/block lists with structured rules, narrowing, and evaluation records. | +| RFC 0026 | Cognitive pipeline Identity & Authentication | **Subject** identities and roles for `subject` matching. | | RFC 0042 | Permissions & Access Control | **Consumes** policy decisions for grants. | | RFC 0047 | Delegation | Engine evaluates `delegation_request`; `request_id` links artifacts. | @@ -56,7 +56,7 @@ Rule `action` and result `decision` share: **`allow`** (grant as narrowed so far ## 6. Resources and subjects -Resources SHOULD use prefixes: `tool:` (RFC 0003), `data:`, `memory:` (RFC 0010). Matching SHOULD prefer exact over pattern, then **longest-prefix / most-specific** tie-break (documented per implementation). **`subject`**: agent id, role, or wildcard per RFC 0026; wildcard grammar MUST be documented by the engine. +Resources SHOULD use prefixes: `tool:` (RFC 0003), `data:`, `memory:` (RFC 0010). Matching SHOULD prefer exact over pattern, then **longest-prefix / most-specific** tie-break (documented per implementation). **`subject`**: cognitive pipeline id, role, or wildcard per RFC 0026; wildcard grammar MUST be documented by the engine. --- @@ -172,7 +172,7 @@ Engines MUST emit one object per evaluated `(request_id, policy_id)` or define a "type": "object", "additionalProperties": false, "properties": { - "agent_id": { "type": "string" }, + "requester_id": { "type": "string" }, "run_id": { "type": "string" }, "budget_snapshot": { "type": "object", "additionalProperties": true }, "risk_assessment": { "type": "string" } @@ -209,7 +209,7 @@ Engines MUST emit one object per evaluated `(request_id, policy_id)` or define a "version": "0.3", "policy_id": "safety_no_shell", "policy_type": "safety", - "description": "Block shell for autonomous runs.", + "description": "Block shell for unsupervised runs.", "priority": 10, "effective_at": "2026-04-14T00:00:00Z", "rules": [ @@ -286,7 +286,7 @@ Engines MUST emit one object per evaluated `(request_id, policy_id)` or define a }, "decided_at": "2026-04-18T12:34:56Z", "context": { - "agent_id": "agent/analyst-7", + "requester_id": "cognitive-pipeline/analyst-7", "run_id": "run_19c0", "budget_snapshot": { "currency": "USD", "remaining": 12.45 }, "risk_assessment": "medium" diff --git a/rfcs/0042-permission-acl.md b/rfcs/0042-permission-acl.md index 253f4d4..346f73e 100644 --- a/rfcs/0042-permission-acl.md +++ b/rfcs/0042-permission-acl.md @@ -1,9 +1,9 @@ # RFC 0042 — Permissions & Access Control (v0.3) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026-04-14 -**Target Version:** Schema v0.7 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026-04-14 +**Target Version:** Schema v0.7 **Discussion:** https://github.com/supernovae/open-cot/discussions/42 --- @@ -12,23 +12,23 @@ This RFC defines **capability-based permission grants** for Open CoT: typed, scoped, time-limited objects that materialize authority *after* the policy engine approves a delegation request. The harness creates grants; the tool executor validates and consumes them. The language model **requests** access; it **never** self-authorizes. -This specification extends **RFC 0026** (Agent Identity — `granted_to`) and **RFC 0041** (Policy Enforcement — issuance and narrowing). It aligns with **RFC 0007** (permissions at `execute_tool`, revocation in finalize), **RFC 0047** (Delegation — `request_ref` / `decision_ref`), and **RFC 0043** (audit mapping for lifecycle events). +This specification extends **RFC 0026** (Cognitive pipeline Identity — `granted_to`) and **RFC 0041** (Policy Enforcement — issuance and narrowing). It aligns with **RFC 0007** (permissions at `execute_tool`, revocation in finalize), **RFC 0047** (Delegation — `request_ref` / `decision_ref`), and **RFC 0043** (audit mapping for lifecycle events). --- ## 2. Context -Open CoT is a **cognitive control plane**: reasoning, tools, memory, and policy compose into inspectable runs. Permissions bridge **policy approval** and **side-effecting execution**. First-class grants are required so that (a) tool endpoints are not confused deputies for ambient authority, (b) auditors can reconstruct what was allowed, for whom, for how long, and under which policy lineage, and (c) sub-agents do not silently inherit parent capabilities. A **permission grant** is a durable record with a strict lifecycle—not a static role matrix embedded in agent config. +Open CoT is a **cognitive control plane**: reasoning, tools, memory, and policy compose into inspectable runs. Permissions bridge **policy approval** and **side-effecting execution**. First-class grants are required so that (a) tool endpoints are not confused deputies for ambient authority, (b) auditors can reconstruct what was allowed, for whom, for how long, and under which policy lineage, and (c) sub-pipelines do not silently inherit parent capabilities. A **permission grant** is a durable record with a strict lifecycle—not a static role matrix embedded in cognitive pipeline config. --- ## 3. Design principles -1. **No self-authorization.** `granted_by`, `policy_ref`, `decision_ref`, and narrowed `scope` MUST be harness/policy-populated; the model MUST NOT supply values treated as issuance authority. -2. **Deny by default.** No matching active grant for `audience` + `scope` ⇒ execution MUST fail closed. -3. **Least privilege in the grant.** Persisted `scope` is the **post-policy** narrowed scope, not the model’s raw intent. -4. **Explicit binding.** `audience` ties the capability to a specific tool/service key. -5. **Time-bounded.** Every grant has `effective_at`, `ttl_seconds`, and `expires_at`; expired grants are unusable (`expired`). +1. **No self-authorization.** `granted_by`, `policy_ref`, `decision_ref`, and narrowed `scope` MUST be harness/policy-populated; the model MUST NOT supply values treated as issuance authority. +2. **Deny by default.** No matching active grant for `audience` + `scope` ⇒ execution MUST fail closed. +3. **Least privilege in the grant.** Persisted `scope` is the **post-policy** narrowed scope, not the model’s raw intent. +4. **Explicit binding.** `audience` ties the capability to a specific tool/service key. +5. **Time-bounded.** Every grant has `effective_at`, `ttl_seconds`, and `expires_at`; expired grants are unusable (`expired`). 6. **Observable transitions.** Every lifecycle change MUST emit a structured audit event (§9). --- @@ -74,7 +74,7 @@ Every grant MUST have `effective_at`, `ttl_seconds` (integer >= 1), and `expires ## 9. Forwardability -**`forwardable`** defaults **`false`**; sub-agents MUST request their own grants. **`forwardable: true`** only via explicit policy; implementations SHOULD require an **authority_receipt** chain (RFC 0047) for attributable inheritance. +**`forwardable`** defaults **`false`**; sub-pipelines MUST request their own grants. **`forwardable: true`** only via explicit policy; implementations SHOULD require an **authority_receipt** chain (RFC 0047) for attributable inheritance. --- @@ -190,7 +190,7 @@ First committed `tool:filesystem` write ⇒ `consumed` + `permission_consumed`. ```json { "permission_id": "b2c3d4e5-f6a7-4b8c-9d0e-123456789abc", - "granted_to": "agent:researcher-prod-east", + "granted_to": "cognitive-pipeline:researcher-prod-east", "scope": { "resource": "tool:search", "action": "read", @@ -245,10 +245,10 @@ Executor MUST enforce headers-only regardless of model prompts. ## 14. Cross-references -- **RFC 0007 — Governed FSM** — consume in `execute_tool`; revoke in finalize. -- **RFC 0026 — Agent Identity** — `granted_to` binding. -- **RFC 0041 — Policy Enforcement** — decisions create grants; optional audience aliases. -- **RFC 0043 — Auditing** — canonical audit stream for §10. +- **RFC 0007 — Governed FSM** — consume in `execute_tool`; revoke in finalize. +- **RFC 0026 — Cognitive pipeline Identity** — `granted_to` binding. +- **RFC 0041 — Policy Enforcement** — decisions create grants; optional audience aliases. +- **RFC 0043 — Auditing** — canonical audit stream for §10. - **RFC 0047 — Delegation** — `request_ref` / `decision_ref`; authority receipts when `forwardable`. --- diff --git a/rfcs/0043-auditing-compliance-logs.md b/rfcs/0043-auditing-compliance-logs.md index 4e8896c..0091874 100644 --- a/rfcs/0043-auditing-compliance-logs.md +++ b/rfcs/0043-auditing-compliance-logs.md @@ -10,7 +10,7 @@ ## 1. Summary -Open CoT is a **cognitive control plane**: governed agents run under explicit policies, permissions, budgets, and traces. This RFC defines the **audit subsystem**, which emits **immutable, hash-chained evidence** of everything that happened during governed execution—supporting **forensics**, **compliance reporting**, and **tamper detection**. +Open CoT is a **cognitive control plane**: governed pipelines run under explicit policies, permissions, budgets, and traces. This RFC defines the **audit subsystem**, which emits **immutable, hash-chained evidence** of everything that happened during governed execution—supporting **forensics**, **compliance reporting**, and **tamper detection**. Audit extends **RFC 0041 (Policy Enforcement)** and **RFC 0031 (Observability & Telemetry)**. Telemetry optimizes operations and reliability; audit provides a **normative evidence trail** (authorization decisions, delegation, tool use, denials, budget outcomes) suitable for regulators, customers, and incident response. @@ -39,9 +39,9 @@ Two schema objects apply: **`audit_event`** (append-only chain links) and **`aud ## 5. Field Semantics (Concise) -**`audit_event`:** `event_id` (UUID), `run_id`, `agent_id`, `observed_at` (RFC 3339 UTC), `event_type`, `details`, `parent_event_id` (UUID or `null` for genesis), `ordering.event_seq` (monotonic sequence per run), and `integrity` (`hash_algorithm`, `content_hash`). +**`audit_event`:** `event_id` (UUID), `run_id`, `requester_id`, `observed_at` (RFC 3339 UTC), `event_type`, `details`, `parent_event_id` (UUID or `null` for genesis), `ordering.event_seq` (monotonic sequence per run), and `integrity` (`hash_algorithm`, `content_hash`). -**`audit_envelope`:** `envelope_id`, `run_id`, `agent_id` (primary), `started_at`, `completed_at`, `completion_status`, `trace_hash`, `event_chain_head`, `event_chain_tail`, `event_count`, `delegation_summary`, `permission_summary`, `budget_final` (RFC 0038 snapshot shape), `policy_violations[]`, `integrity` (hash required; `signature_algorithm` / `signature` optional). +**`audit_envelope`:** `envelope_id`, `run_id`, `requester_id` (primary), `started_at`, `completed_at`, `completion_status`, `trace_hash`, `event_chain_head`, `event_chain_tail`, `event_count`, `delegation_summary`, `permission_summary`, `budget_final` (RFC 0038 snapshot shape), `policy_violations[]`, `integrity` (hash required; `signature_algorithm` / `signature` optional). **`completion_status`:** `succeeded` | `failed` | `denied` | `budget_exhausted` | `external_stop` | `escalation_timeout` | `fail_safe`. @@ -76,7 +76,7 @@ Two schema objects apply: **`audit_event`** (append-only chain links) and **`aud "schema_version": { "type": "string", "enum": ["0.3"] }, "event_id": { "type": "string", "format": "uuid" }, "run_id": { "type": "string", "minLength": 1 }, - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "observed_at": { "type": "string", "format": "date-time" }, "event_type": { "type": "string", "enum": ["run_started","policy_evaluated","permission_granted","permission_consumed","permission_expired","permission_revoked","tool_executed","delegation_requested","delegation_decided","escalation_initiated","escalation_resolved","postcondition_violated","denial_recorded","budget_warning","budget_exhausted","run_completed","run_failed","trace_sealed"] }, "details": { "type": "object", "additionalProperties": true }, @@ -100,7 +100,7 @@ Two schema objects apply: **`audit_event`** (append-only chain links) and **`aud "required": ["hash_algorithm", "content_hash"] } }, - "required": ["schema_version","event_id","run_id","agent_id","observed_at","event_type","details","parent_event_id","ordering","integrity"] + "required": ["schema_version","event_id","run_id","requester_id","observed_at","event_type","details","parent_event_id","ordering","integrity"] } ``` @@ -118,7 +118,7 @@ Two schema objects apply: **`audit_event`** (append-only chain links) and **`aud "schema_version": { "type": "string", "enum": ["0.3"] }, "envelope_id": { "type": "string", "format": "uuid" }, "run_id": { "type": "string", "minLength": 1 }, - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "started_at": { "type": "string", "format": "date-time" }, "completed_at": { "type": "string", "format": "date-time" }, "completion_status": { "type": "string", "enum": ["succeeded","failed","denied","budget_exhausted","external_stop","escalation_timeout","fail_safe"] }, @@ -193,7 +193,7 @@ Two schema objects apply: **`audit_event`** (append-only chain links) and **`aud "required": ["hash_algorithm", "content_hash"] } }, - "required": ["schema_version","envelope_id","run_id","agent_id","started_at","completed_at","completion_status","trace_hash","event_chain_head","event_chain_tail","event_count","delegation_summary","permission_summary","budget_final","policy_violations","integrity"] + "required": ["schema_version","envelope_id","run_id","requester_id","started_at","completed_at","completion_status","trace_hash","event_chain_head","event_chain_tail","event_count","delegation_summary","permission_summary","budget_final","policy_violations","integrity"] } ``` @@ -205,21 +205,21 @@ Two schema objects apply: **`audit_event`** (append-only chain links) and **`aud Illustrative `content_hash`; verifiers recompute from canonical bytes with `integrity` removed. ```json -{"schema_version":"0.3","event_id":"a1b2c3d4-e5f6-4789-a012-3456789abcde","run_id":"run_20260414T153012Z_planner_01","agent_id":"planner.primary","observed_at":"2026-04-14T15:30:18.421Z","event_type":"permission_granted","details":{"permission_id":"perm_search_readonly_01","scope":{"tools":["tool:web_search"],"resources":["urn:opencot:corp_kb:public"]},"ttl_seconds":900,"grantor":"policy_engine@v0.7","policy_binding":{"policy_id":"corp_safe_search","policy_version":"2026.04.1"}},"parent_event_id":"00000000-0000-4000-8000-000000000001","ordering":{"event_seq":3},"integrity":{"hash_algorithm":"sha256","content_hash":"7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069"}} +{"schema_version":"0.3","event_id":"a1b2c3d4-e5f6-4789-a012-3456789abcde","run_id":"run_20260414T153012Z_planner_01","requester_id":"planner.primary","observed_at":"2026-04-14T15:30:18.421Z","event_type":"permission_granted","details":{"permission_id":"perm_search_readonly_01","scope":{"tools":["tool:web_search"],"resources":["urn:opencot:corp_kb:public"]},"ttl_seconds":900,"grantor":"policy_engine@v0.7","policy_binding":{"policy_id":"corp_safe_search","policy_version":"2026.04.1"}},"parent_event_id":"00000000-0000-4000-8000-000000000001","ordering":{"event_seq":3},"integrity":{"hash_algorithm":"sha256","content_hash":"7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069"}} ``` ### 10.2 `audit_envelope` — delegation, tools, success ```json -{"schema_version":"0.3","envelope_id":"f47ac10b-58cc-4372-a567-0e02b2c3d479","run_id":"run_20260414T153012Z_planner_01","agent_id":"planner.primary","started_at":"2026-04-14T15:30:12.000Z","completed_at":"2026-04-14T15:31:02.883Z","completion_status":"succeeded","trace_hash":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","event_chain_head":"00000000-0000-4000-8000-000000000001","event_chain_tail":"99999999-9999-4999-8999-999999999999","event_count":14,"delegation_summary":{"total_requested":1,"total_granted":1,"total_denied":0,"total_narrowed":1},"permission_summary":{"total_granted":2,"total_consumed":2,"total_expired":0,"total_revoked":0},"budget_final":{"tokens_used":4120,"tokens_remaining":880,"cost_used":0.042,"cost_remaining":0.058,"steps_used":6,"steps_remaining":4,"tool_calls_used":3,"tool_calls_remaining":7,"retries_used":0,"retries_remaining":2},"policy_violations":[],"integrity":{"hash_algorithm":"sha256","content_hash":"2c624232cdd221699294d012d04dfb23f036edaedd441b52e063bd86ba4a3b74","signature_algorithm":"ed25519","signature":"BASE64_DETACHED_SIGNATURE_PLACEHOLDER"}} +{"schema_version":"0.3","envelope_id":"f47ac10b-58cc-4372-a567-0e02b2c3d479","run_id":"run_20260414T153012Z_planner_01","requester_id":"planner.primary","started_at":"2026-04-14T15:30:12.000Z","completed_at":"2026-04-14T15:31:02.883Z","completion_status":"succeeded","trace_hash":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","event_chain_head":"00000000-0000-4000-8000-000000000001","event_chain_tail":"99999999-9999-4999-8999-999999999999","event_count":14,"delegation_summary":{"total_requested":1,"total_granted":1,"total_denied":0,"total_narrowed":1},"permission_summary":{"total_granted":2,"total_consumed":2,"total_expired":0,"total_revoked":0},"budget_final":{"tokens_used":4120,"tokens_remaining":880,"cost_used":0.042,"cost_remaining":0.058,"steps_used":6,"steps_remaining":4,"tool_calls_used":3,"tool_calls_remaining":7,"retries_used":0,"retries_remaining":2},"policy_violations":[],"integrity":{"hash_algorithm":"sha256","content_hash":"2c624232cdd221699294d012d04dfb23f036edaedd441b52e063bd86ba4a3b74","signature_algorithm":"ed25519","signature":"BASE64_DETACHED_SIGNATURE_PLACEHOLDER"}} ``` ## 11. Cross-References | RFC | Document | Relevance | |-----|----------|-----------| -| RFC 0007 | [0007-agent-loop-protocol.md](0007-agent-loop-protocol.md) | Governed FSM; `audit_seal`, `trace_sealed`, receipt linkage. | -| RFC 0031 | [0031-agent-observability-telemtry.md](0031-agent-observability-telemtry.md) | Telemetry; audit extends with compliance-grade events. | +| RFC 0007 | [0007-cognitive-pipeline-protocol.md](0007-cognitive-pipeline-protocol.md) | Governed FSM; `audit_seal`, `trace_sealed`, receipt linkage. | +| RFC 0031 | [0031-cognitive-observability-telemetry.md](0031-cognitive-observability-telemetry.md) | Telemetry; audit extends with compliance-grade events. | | RFC 0035 | [0035-data-provenance-tracking.md](0035-data-provenance-tracking.md) | Provenance and integrity model alignment. | | RFC 0041 | [0041-policy-enforcement-schema.md](0041-policy-enforcement-schema.md) | Policy evaluations as audit events. | | RFC 0042 | [0042-permission-acl.md](0042-permission-acl.md) | Permission lifecycle in `permission_*` events. | @@ -234,7 +234,7 @@ Illustrative `content_hash`; verifiers recompute from canonical bytes with `inte | Chain link | `parent_event_id` → predecessor **`event_id`**; tamper evidence from per-event `content_hash` + envelope binding. | | Per-event signatures | Out of scope for v0.3; optional **envelope** signature only. | | Strict `details` typing | Deferred; `additionalProperties: true` until stable cross-vendor shapes exist. | -| Multi-agent | Each event carries its **`agent_id`**; envelope `agent_id` is the run’s primary agent. | +| Multi-cognitive pipeline | Each event carries its **`requester_id`**; envelope `requester_id` is the run’s primary cognitive pipeline. | | Clock skew | `observed_at` is writer clock; NTP recommended; ordering uses `event_seq` first. | ## 13. Acceptance Criteria diff --git a/rfcs/0044-governance-organizational-controls.md b/rfcs/0044-governance-organizational-controls.md index 0e93498..250a4bc 100644 --- a/rfcs/0044-governance-organizational-controls.md +++ b/rfcs/0044-governance-organizational-controls.md @@ -10,7 +10,7 @@ ## 1. Summary -Open CoT is a **cognitive control plane**. This RFC specifies **organizational governance**: how policies, permissions, and constraints cascade from platform defaults through organizations and teams to agents—the enterprise-readiness layer. Configs are hierarchical and **monotonic toward restriction** (children narrow, never broaden). Resolution walks the parent chain and merges so the strictest interpretation wins. This extends **RFC 0041** (policies) and **RFC 0042** (permissions); it binds them to scope and compliance metadata without redefining policy rules or ACL tuples. +Open CoT is a **cognitive control plane**. This RFC specifies **organizational governance**: how policies, permissions, and constraints cascade from platform defaults through organizations and teams to pipelines—the enterprise-readiness layer. Configs are hierarchical and **monotonic toward restriction** (children narrow, never broaden). Resolution walks the parent chain and merges so the strictest interpretation wins. This extends **RFC 0041** (policies) and **RFC 0042** (permissions); it binds them to scope and compliance metadata without redefining policy rules or ACL tuples. --- @@ -22,15 +22,15 @@ Open CoT is a **cognitive control plane**. This RFC specifies **organizational g ## 3. Relationship to prior RFCs -RFC **0041** — `required_policies` hold **policy_id** values evaluated per 0041. RFC **0042** — `max_trust_level` caps trust; tools interact with grants. RFC **0026** — `scope_id` and parent links identify org, team, agent. RFC **0007** — governance SHOULD load during **receive** / pre-act. RFC **0045** — `compliance_requirements[].pii_policy` references a **constraint_id**. +RFC **0041** — `required_policies` hold **policy_id** values evaluated per 0041. RFC **0042** — `max_trust_level` caps trust; tools interact with grants. RFC **0026** — `scope_id` and parent links identify org, team, cognitive pipeline. RFC **0007** — governance SHOULD load during **receive** / pre-act. RFC **0045** — `compliance_requirements[].pii_policy` references a **constraint_id**. --- ## 4. Governance layers and inheritance -**Levels (wide → narrow):** `global` (platform defaults, e.g. block `shell` unless an approved exception path exists), `organization` (tenant posture, e.g. SOC2 + no DB writes), `team` (refinements, e.g. Engineering code tools under approval), `agent` (per-agent overrides; still bound by ancestors). +**Levels (wide → narrow):** `global` (platform defaults, e.g. block `shell` unless an approved exception path exists), `organization` (tenant posture, e.g. SOC2 + no DB writes), `team` (refinements, e.g. Engineering code tools under approval), `cognitive pipeline` (per-cognitive pipeline overrides; still bound by ancestors). -**Parent chain:** `agent` → `team` → `organization` → `global`. Only `global` has `parent_governance_id: null`. Each non-global record MUST point to one parent at the immediate ancestor level. +**Parent chain:** `cognitive pipeline` → `team` → `organization` → `global`. Only `global` has `parent_governance_id: null`. Each non-global record MUST point to one parent at the immediate ancestor level. **Narrowing (normative):** `restricted_tools` — effective blocklist is **union** along the chain. `allowed_tools_override` — **intersection** of non-empty allowlists; empty array at a layer adds no intersection; children MUST NOT allow tools blocked above. `required_policies` — **union**. `max_trust_level` — order `untrusted < low < medium < high` as permitted ceiling; effective ceiling is the **minimum** (strictest); child ceiling MUST NOT exceed parent. `approval_workflows` — child entries only **tighten** control; default merge is **conjunctive** (all applicable workflows satisfied). Violations MUST fail validation with deterministic errors. @@ -38,7 +38,7 @@ RFC **0041** — `required_policies` hold **policy_id** values evaluated per 004 ## 5. Policy resolution -For each governed request: (1) load the agent-scoped governance record; (2) walk `parent_governance_id` through team, org, global; (3) merge per §4; (4) materialize the effective policy set for RFC 0041/0042; (5) emit in deterministic order (e.g. global→…→agent) for audit. Implementations SHOULD cache by `(agent_id, governance revision tuple)` and invalidate on change. +For each governed request: (1) load the cognitive-pipeline-scoped governance record; (2) walk `parent_governance_id` through team, org, global; (3) merge per §4; (4) materialize the effective policy set for RFC 0041/0042; (5) emit in deterministic order (e.g. global→…→cognitive pipeline) for audit. Implementations SHOULD cache by `(requester_id, governance revision tuple)` and invalidate on change. --- @@ -57,7 +57,7 @@ Field semantics: `governance_id` UUID for this document; `scope_level` / `scope_ "properties": { "version": { "type": "string", "enum": ["0.2"] }, "governance_id": { "type": "string", "format": "uuid" }, - "scope_level": { "type": "string", "enum": ["global", "organization", "team", "agent"] }, + "scope_level": { "type": "string", "enum": ["global", "organization", "team", "pipeline"] }, "scope_id": { "type": ["string", "null"] }, "parent_governance_id": { "type": ["string", "null"] }, "required_policies": { @@ -213,7 +213,7 @@ Inherits org parent `a1b2c3d4-…`; adds code/repo tools via allowlist (still ca ## 8. Cross-references -**RFC 0007** — Governed FSM; governance during receive state. **RFC 0041** — Policy enforcement; `required_policies`. **RFC 0042** — Permissions; trust capped by `max_trust_level`. **RFC 0026** — Agent identity; `scope_id`. **RFC 0045** — Ethics; `pii_policy` → **constraint_id**. +**RFC 0007** — Governed FSM; governance during receive state. **RFC 0041** — Policy enforcement; `required_policies`. **RFC 0042** — Permissions; trust capped by `max_trust_level`. **RFC 0026** — Cognitive pipeline identity; `scope_id`. **RFC 0045** — Ethics; `pii_policy` → **constraint_id**. --- diff --git a/rfcs/0045-ethical-risk-contraints-reasoning-agents.md b/rfcs/0045-ethics-risk-constraints-cognitive-runtimes.md similarity index 93% rename from rfcs/0045-ethical-risk-contraints-reasoning-agents.md rename to rfcs/0045-ethics-risk-constraints-cognitive-runtimes.md index a5f83a0..742059a 100644 --- a/rfcs/0045-ethical-risk-contraints-reasoning-agents.md +++ b/rfcs/0045-ethics-risk-constraints-cognitive-runtimes.md @@ -1,4 +1,4 @@ -# RFC 0045 — Ethical & Risk Constraints for Reasoning Agents (v0.1) +# RFC 0045 — Ethical & Risk Constraints for Reasoning Pipelines (v0.1) **Status:** Draft **Author:** Byron / Open CoT Community **Created:** 2026-04-14 @@ -8,7 +8,7 @@ ## 1. Summary -This RFC defines ethical and risk constraints for reasoning agents, with explicit privacy and safety controls for open reasoning traces. +This RFC defines ethical and risk constraints for reasoning pipelines, with explicit privacy and safety controls for open reasoning traces. It extends: diff --git a/rfcs/0047-delegation-extension.md b/rfcs/0047-delegation-extension.md index 480e33d..4888e40 100644 --- a/rfcs/0047-delegation-extension.md +++ b/rfcs/0047-delegation-extension.md @@ -1,24 +1,24 @@ # RFC 0047 — Delegation Extension (v0.2) -**Status:** Draft -**Author:** Byron / Open CoT Community -**Created:** 2026-04-18 -**Target Version:** Schema v0.8 -**Discussion:** https://github.com/supernovae/open-cot/discussions/47 +**Status:** Draft +**Author:** Byron / Open CoT Community +**Created:** 2026-04-18 +**Target Version:** Schema v0.8 +**Discussion:** https://github.com/supernovae/open-cot/discussions/47 --- ## 1. Summary -Open CoT is a **cognitive control plane**: the model proposes; the harness, policy engine, and authorized brokers **decide**. **Delegation** is the formal process by which a model requests authority to act, and the harness evaluates that request—granting, denying, narrowing, or escalating—before any side-effecting tool runs. +Open CoT is a **cognitive control plane**: the model proposes; the harness, policy engine, and authorized brokers **reconcile**. **Delegation** is the formal process by which a model requests authority to act, and the harness evaluates that request—granting, denying, narrowing, or escalating—before any side-effecting tool runs. The governing insight of this extension is strict and non-negotiable: **the model does not authorize itself.** Typed schema objects represent every step of the authority flow so traces are replayable, policies consultable, and tool dispatch provably bound to a grant chain. This RFC defines three JSON objects: -1. **`delegation_request`** — intent and scope proposed (in part) by the model; harness binds identity, run context, and provenance. -2. **`delegation_decision`** — harness/policy-only outcome linked to the request. +1. **`delegation_request`** — intent and scope proposed (in part) by the model; harness binds identity, run context, and provenance. +2. **`delegation_decision`** — harness/policy-only outcome linked to the request. 3. **`authority_receipt`** — tamper-evident grant artifact produced by an auth broker after approval, consumed at tool execution. -**Cross-references:** [RFC 0007 — Agent Loop / FSM](0007-agent-loop-protocol.md) (states `request_authority`, `validate_authority`, `delegate_narrow`, `execute_tool`); [RFC 0026 — Agent Identity](0026-agent-identity-auth.md) (`requester` MUST be a verified `agent_id`); [RFC 0041 — Policy](0041-policy-enforcement-schema.md) (rules consulted → `policy_refs`); [RFC 0042 — Permissions](0042-permission-acl.md) (`permission_id` references stored grants); [RFC 0048 — Execution receipts](0048-execution-receipts-audit-envelopes.md) (tool receipts SHOULD reference `authority_receipt` or standing grant). +**Cross-references:** [RFC 0007 — Cognitive Pipeline / FSM](0007-cognitive-pipeline-protocol.md) (states `request_authority`, `validate_authority`, `delegate_narrow`, `execute_tool`); [RFC 0026 — Cognitive pipeline Identity](0026-requester-identity-auth.md) (`requester` MUST be a verified `requester_id`); [RFC 0041 — Policy](0041-policy-enforcement-schema.md) (rules consulted → `policy_refs`); [RFC 0042 — Permissions](0042-permission-acl.md) (`permission_id` references stored grants); [RFC 0048 — Execution receipts](0048-execution-receipts-audit-envelopes.md) (tool receipts SHOULD reference `authority_receipt` or standing grant). --- @@ -27,7 +27,7 @@ This RFC defines three JSON objects: | Zone | Who writes | Guarantees | |------|------------|--------------| | Model-adjacent | Model output supplies **intent**, **justification**, **requested_scope** preferences, TTL/audience **preferences**, and **task_context_ref** only. | Untrusted text and structure proposals. | -| Harness | Fills `request_id`, `requester`, `run_id`, `observed_at`, `provenance`; merges model fields after validation. | `requester` MUST match verified identity ([RFC 0026](0026-agent-identity-auth.md)). | +| Harness | Fills `request_id`, `requester`, `run_id`, `observed_at`, `provenance`; merges model fields after validation. | `requester` MUST match verified identity ([RFC 0026](0026-requester-identity-auth.md)). | | Policy | Emits **`delegation_decision`** exclusively. | Model MUST NOT emit or alter decisions. | | Auth broker | Emits **`authority_receipt`**; computes **integrity** over all other receipt fields. | Receipt is **tamper-evident**; executors verify hash (and signature when configured) before dispatch. | @@ -39,14 +39,14 @@ This RFC defines three JSON objects: **Model-provided (merged by harness):** `intent`, `justification`, `requested_scope`, `preferred_ttl_seconds`, `preferred_audience`, `task_context_ref`. -**Harness-provided:** `request_id`, `requester` (verified `agent_id`), `run_id`, `observed_at`, `provenance` (`trace_step_id`, `plan_version`). +**Harness-provided:** `request_id`, `requester` (verified `requester_id`), `run_id`, `observed_at`, `provenance` (`trace_step_id`, `plan_version`). **Required fields:** `request_id`, `requester`, `run_id`, `requested_scope`, `observed_at`. `requested_scope` is an object with: -- `resource` — logical resource identifier (e.g. `mailbox:user@example.com`). -- `action` — verb or capability token (e.g. `email.read`). +- `resource` — logical resource identifier (e.g. `mailbox:user@example.com`). +- `action` — verb or capability token (e.g. `email.read`). - `constraints` — optional object (column allowlists, row limits, folder IDs, etc.). ### 3.2 `delegation_decision` @@ -73,12 +73,12 @@ Produced by the **auth broker** after a favorable decision path. Binds `permissi ## 4. Lifecycle (FSM mapping) -The following aligns with the governed execution FSM in [RFC 0007](0007-agent-loop-protocol.md): +The following aligns with the governed execution FSM in [RFC 0007](0007-cognitive-pipeline-protocol.md): -1. **`plan`** — Model proposes actions and capability annotations; no tools. -2. **`request_authority`** — Harness materializes a **`delegation_request`** (model content validated and normalized; harness fields authoritative). -3. **`validate_authority`** — Policy engine evaluates the request and emits **`delegation_decision`**. -4. **`delegate_narrow`** — Auth broker issues **`authority_receipt`** with `granted_scope ≤` effective allowed scope (set-theoretic or lattice comparison per deployment). +1. **`plan`** — Model proposes actions and capability annotations; no tools. +2. **`request_authority`** — Harness materializes a **`delegation_request`** (model content validated and normalized; harness fields authoritative). +3. **`validate_authority`** — Policy engine evaluates the request and emits **`delegation_decision`**. +4. **`delegate_narrow`** — Auth broker issues **`authority_receipt`** with `granted_scope ≤` effective allowed scope (set-theoretic or lattice comparison per deployment). 5. **`execute_tool`** — Tool executor accepts dispatch only with valid receipt (or documented standing grant shortcut per RFC 0007 §10.1). [RFC 0048](0048-execution-receipts-audit-envelopes.md) SHOULD cite the `receipt_id`. Standing authorization (`plan` → `execute_tool` shortcut) bypasses this chain only where policy explicitly allows; the execution receipt still MUST cite how obligation was satisfied. @@ -231,7 +231,7 @@ The model asks to read full messages; policy narrows to **headers only**; broker { "schema_version": "0.2", "request_id": "dr_email_9f3a", - "requester": "agent:org/acme/exec-worker-07", + "requester": "cognitive-pipeline:org/acme/exec-worker-07", "run_id": "run_20260418_0412", "observed_at": "2026-04-18T04:12:01Z", "intent": "Summarize unread customer threads for Q2 report", @@ -307,10 +307,10 @@ The model asks to read full messages; policy narrows to **headers only**; broker ## 9. Acceptance criteria -1. For every tool side effect outside standing grants, the trace contains **`delegation_request`** → **`delegation_decision`** → **`authority_receipt`** in causal order with matching ids. -2. **`delegation_decision`** objects in audited stores MUST NOT be creatable or editable via model-facing APIs. -3. **`authority_receipt.integrity.content_hash`** MUST be verified before `execute_tool` dispatch; mismatch aborts execution and logs a security event. -4. `requester` MUST equal a registered [RFC 0026](0026-agent-identity-auth.md) `agent_id` vetted for the run. +1. For every tool side effect outside standing grants, the trace contains **`delegation_request`** → **`delegation_decision`** → **`authority_receipt`** in causal order with matching ids. +2. **`delegation_decision`** objects in audited stores MUST NOT be creatable or editable via model-facing APIs. +3. **`authority_receipt.integrity.content_hash`** MUST be verified before `execute_tool` dispatch; mismatch aborts execution and logs a security event. +4. `requester` MUST equal a registered [RFC 0026](0026-requester-identity-auth.md) `requester_id` vetted for the run. 5. [RFC 0048](0048-execution-receipts-audit-envelopes.md) tool execution records SHOULD include `receipt_id` (or standing-grant citation per RFC 0007); OAuth2 mappings in §5 are **optional**—native Open CoT objects are normative. --- diff --git a/rfcs/0048-execution-receipts-audit-envelopes.md b/rfcs/0048-execution-receipts-audit-envelopes.md index be9140e..9c4aeaa 100644 --- a/rfcs/0048-execution-receipts-audit-envelopes.md +++ b/rfcs/0048-execution-receipts-audit-envelopes.md @@ -8,7 +8,7 @@ ## 1. Summary -Open-CoT is a cognitive control plane. **Execution receipts** are tamper-evident records that prove what happened during governed agent execution, linking each tool call to the authorizing permission, policy path, and delegation context. **Audit envelopes** seal a full run: trace hash, artifact IDs, summaries, final budget (RFC 0038), and optional signatures. Receipts are per tool call; envelopes are emitted once per run in `audit_seal`. Neither object carries raw tool I/O—only **SHA-256** hashes of canonical serialized payloads—so artifacts can be shared for compliance without exposing secrets. Integrity blocks mirror RFC 0035. This RFC normatively defines `tool_execution_receipt` and `audit_envelope` for Schema v0.8. +Open-CoT is a cognitive control plane. **Execution receipts** are tamper-evident records that prove what happened during governed cognitive pipeline execution, linking each tool call to the authorizing permission, policy path, and delegation context. **Audit envelopes** seal a full run: trace hash, artifact IDs, summaries, final budget (RFC 0038), and optional signatures. Receipts are per tool call; envelopes are emitted once per run in `audit_seal`. Neither object carries raw tool I/O—only **SHA-256** hashes of canonical serialized payloads—so artifacts can be shared for compliance without exposing secrets. Integrity blocks mirror RFC 0035. This RFC normatively defines `tool_execution_receipt` and `audit_envelope` for Schema v0.8. ## 2. `tool_execution_receipt` @@ -16,7 +16,7 @@ Produced by the tool executor after every tool call. Fields: `execution_id` (uui ## 3. `audit_envelope` -Sealed summary of a governed run (RFC 0043 introduces auditing; this RFC specifies the envelope schema and lifecycle). Fields: `envelope_id` (uuid), `run_id`, `agent_id`, `task_hash`, `started_at`, `completed_at`, `completion_status` ∈ {`succeeded`,`failed`,`denied`,`budget_exhausted`,`external_stop`,`escalation_timeout`,`fail_safe`}, `trace_hash`, `delegation_requests` / `delegation_decisions` (string IDs), `authority_receipts` / `tool_execution_receipts` (ID arrays), `delegation_summary` (`total_requested`, `total_granted`, `total_denied`, `total_narrowed`, `total_escalated`), `permission_summary` (`total_granted`, `total_consumed`, `total_expired`, `total_revoked`), `budget_final` (RFC 0038 `BudgetSnapshot`), `policy_violations` (`violation_id`, `policy_id`, `rule_id`, `description`, `severity`, `observed_at`), `integrity` (`hash_algorithm`, `content_hash`, optional `signature`, `signing_key_id`). `content_hash` covers all fields **except** `integrity`. +Sealed summary of a governed run (RFC 0043 introduces auditing; this RFC specifies the envelope schema and lifecycle). Fields: `envelope_id` (uuid), `run_id`, `requester_id`, `task_hash`, `started_at`, `completed_at`, `completion_status` ∈ {`succeeded`,`failed`,`denied`,`budget_exhausted`,`external_stop`,`escalation_timeout`,`fail_safe`}, `trace_hash`, `delegation_requests` / `delegation_decisions` (string IDs), `authority_receipts` / `tool_execution_receipts` (ID arrays), `delegation_summary` (`total_requested`, `total_granted`, `total_denied`, `total_narrowed`, `total_escalated`), `permission_summary` (`total_granted`, `total_consumed`, `total_expired`, `total_revoked`), `budget_final` (RFC 0038 `BudgetSnapshot`), `policy_violations` (`violation_id`, `policy_id`, `rule_id`, `description`, `severity`, `observed_at`), `integrity` (`hash_algorithm`, `content_hash`, optional `signature`, `signing_key_id`). `content_hash` covers all fields **except** `integrity`. ## 4. JSON Schema — receipt and envelope bundle (normative) @@ -104,7 +104,7 @@ Sealed summary of a governed run (RFC 0043 introduces auditing; this RFC specifi "schema_version": { "type": "string", "enum": ["0.2"] }, "envelope_id": { "type": "string", "format": "uuid" }, "run_id": { "type": "string", "minLength": 1 }, - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "task_hash": { "type": "string", "pattern": "^[a-f0-9]{64}$" }, "started_at": { "type": "string", "format": "date-time" }, "completed_at": { "type": "string", "format": "date-time" }, @@ -135,7 +135,7 @@ Sealed summary of a governed run (RFC 0043 introduces auditing; this RFC specifi "policy_violations": { "type": "array", "items": { "$ref": "#/$defs/policy_violation_entry" } }, "integrity": { "$ref": "#/$defs/integrity" } }, - "required": ["schema_version", "envelope_id", "run_id", "agent_id", "task_hash", "started_at", "completed_at", "completion_status", "trace_hash", "delegation_requests", "delegation_decisions", "authority_receipts", "tool_execution_receipts", "delegation_summary", "permission_summary", "budget_final", "policy_violations", "integrity"] + "required": ["schema_version", "envelope_id", "run_id", "requester_id", "task_hash", "started_at", "completed_at", "completion_status", "trace_hash", "delegation_requests", "delegation_decisions", "authority_receipts", "tool_execution_receipts", "delegation_summary", "permission_summary", "budget_final", "policy_violations", "integrity"] } } } @@ -230,7 +230,7 @@ Synthetic 64-char lowercase hex stands in for real SHA-256; conforming `content_ "schema_version": "0.2", "envelope_id": "11111111-2222-4333-8444-555555555555", "run_id": "run_20260418_03", - "agent_id": "planner-alpha", + "requester_id": "planner-alpha", "task_hash": "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee", "started_at": "2026-04-18T11:59:00.000Z", "completed_at": "2026-04-18T12:10:00.000Z", diff --git a/rfcs/0049-capability-manifest.md b/rfcs/0049-capability-manifest.md index 37fa129..c4771ab 100644 --- a/rfcs/0049-capability-manifest.md +++ b/rfcs/0049-capability-manifest.md @@ -8,7 +8,7 @@ ## 1. Summary -Open-CoT is a cognitive control plane for governed agent execution. Without a **capability manifest**, models propose actions with incomplete knowledge of what the harness will permit, burning context on delegation cycles that resolve to denial. This RFC defines the **capability manifest**: a harness-compiled, model-facing snapshot that summarizes callable tools, policy and sandbox posture, remaining budget, and agent trust. The manifest is injected at key finite-state machine (FSM) transitions so the model can plan within real constraints. Schema v0.8 adds a normative JSON representation for validation and audit, and a normative **compact text** serialization for token-efficient injection. +Open-CoT is a cognitive control plane for governed cognitive pipeline execution. Without a **capability manifest**, models propose actions with incomplete knowledge of what the harness will permit, burning context on delegation cycles that resolve to denial. This RFC defines the **capability manifest**: a harness-compiled, model-facing snapshot that summarizes callable tools, policy and sandbox posture, remaining budget, and cognitive pipeline trust. The manifest is injected at key finite-state machine (FSM) transitions so the model can plan within real constraints. Schema v0.8 adds a normative JSON representation for validation and audit, and a normative **compact text** serialization for token-efficient injection. ## 2. Motivation and problem statement @@ -20,13 +20,13 @@ The following requirements apply to conforming harnesses for Schema v0.8. **N1 — Compilation.** The harness MUST compile the manifest; the model MUST NOT construct or mutate the manifest as authoritative state. -**N2 — Provenance.** Each manifest instance MUST be attributable to a `run_id`, `agent_id`, and compilation `timestamp`, and MUST record the FSM `phase` at which it was produced. +**N2 — Provenance.** Each manifest instance MUST be attributable to a `run_id`, `requester_id`, and compilation `timestamp`, and MUST record the FSM `phase` at which it was produced. **N3 — Heartbeat injection.** The harness MUST re-compile and inject a fresh manifest before **every LLM call** during a governed run. Models lose sight of earlier context as the conversation grows (context decay); a stale manifest from three LLM calls ago is effectively invisible. Re-injecting at every model-facing turn keeps budget numbers, tool availability, and constraints current regardless of how far the run has progressed. This pattern is called the **manifest heartbeat**. At minimum, the manifest MUST be injected at FSM states **`frame`**, **`plan`**, **`critique_verify`**, and **`finalize`** — every state where the model makes decisions. Conforming harnesses SHOULD inject at every LLM call without exception; the cost is under 200 tokens per injection and is repaid many times over by preventing hallucinated tool calls and wasted delegation cycles. -**N4 — Audit.** The structured JSON form MUST be retained on **AgentState** for the run and MUST be referenceable from the audit envelope (RFC 0048) as part of the governed trace. +**N4 — Audit.** The structured JSON form MUST be retained on **PipelineState** for the run and MUST be referenceable from the audit envelope (RFC 0048) as part of the governed trace. **N5 — Blocked tools.** Blocked tool names MUST appear in `tools.blocked` (structured) and in the compact `tools_restricted` line with reason `blocked` where applicable, so the model can avoid requesting them. Descriptions for blocked tools are intentionally omitted in compact form (name only). @@ -40,11 +40,11 @@ The manifest aggregates, at minimum: | Sandbox configuration | 0017 | Allow/deny lists, environment limits affecting tool viability | | Policy rules | 0041 | `access_level` per tool, narrowing constraints, approval requirements | | Budget tracker | 0038 | `steps_remaining`, `tool_calls_remaining`, `tokens_remaining`, `retries_remaining` | -| Agent identity | 0026 | `trust_level` | +| Cognitive pipeline identity | 0026 | `trust_level` | **Access levels** align with permission semantics (RFC 0042): `pre_authorized`, `requires_delegation`, and `blocked`. Tools that are blocked by sandbox or policy appear in `tools.blocked` and MUST NOT appear in `tools.available` with `access_level: "blocked"`; blocked status is expressed only via the blocked list and compact serialization. -**Trust level** is one of `untrusted`, `low`, `medium`, `high`, derived from agent identity and deployment policy. It informs expected delegation friction, not cryptographic proof. +**Trust level** is one of `untrusted`, `low`, `medium`, `high`, derived from cognitive pipeline identity and deployment policy. It informs expected delegation friction, not cryptographic proof. ## 5. Injection points — the manifest heartbeat @@ -63,13 +63,13 @@ The heartbeat is synchronized with the governed FSM (RFC 0007): | **`frame`** | Initial briefing — full manifest with all available tools, blocked tools, budget, trust level, constraints. | | **`plan`** | Planning briefing — model sees what it can request before committing to a plan. Prevents hallucinated tool references. | | **`critique_verify`** | Post-execution refresh — updated budget after tool calls; revoked permissions reflected. | -| **`finalize`** | Final-answer briefing — model knows remaining budget and can decide whether to attempt more work or synthesize. | +| **`finalize`** | Final-answer briefing — model knows remaining budget and can reconcile whether to attempt more work or synthesize. | Conforming harnesses SHOULD inject at every LLM call, not only the four states listed above. Any additional LLM call (for example, a re-plan after critique) benefits from the same heartbeat. The per-injection cost is negligible compared to the tokens saved by preventing the model from proposing actions against stale or forgotten context. ## 6. Representations -**Structured JSON** — Canonical for storage, schema validation, audit linkage, and machine processing. This is the object persisted on AgentState and cited by audit envelopes. +**Structured JSON** — Canonical for storage, schema validation, audit linkage, and machine processing. This is the object persisted on PipelineState and cited by audit envelopes. **Compact text** — Canonical for model-visible context. It uses delimiter lines `[capability_manifest]` … `[/capability_manifest]` so parsers and harness scrubbers can locate and optionally strip the briefing when superseded. @@ -85,11 +85,11 @@ Implementations MAY attach optional `tool_contract_ref` (URI or registry id) per "title": "Open CoT RFC 0049 — Capability Manifest", "type": "object", "additionalProperties": false, - "required": ["manifest_id", "run_id", "agent_id", "timestamp", "phase", "tools", "budget", "trust_level"], + "required": ["manifest_id", "run_id", "requester_id", "timestamp", "phase", "tools", "budget", "trust_level"], "properties": { "manifest_id": { "type": "string", "minLength": 1 }, "run_id": { "type": "string", "minLength": 1 }, - "agent_id": { "type": "string", "minLength": 1 }, + "requester_id": { "type": "string", "minLength": 1 }, "timestamp": { "type": "string", "format": "date-time" }, "phase": { "type": "string", "minLength": 1, "description": "FSM phase at which this manifest was compiled (e.g., frame, critique_verify, plan)." }, "tools": { @@ -170,7 +170,7 @@ Typical deployments SHOULD target **under 200 tokens** for a five-tool setup in ## 10. Lifecycle 1. During **`receive`**, the harness gathers registry, sandbox, policy, budget, and identity inputs. -2. Before **every LLM call** (the heartbeat), the harness recompiles the manifest from current state, assigns a fresh `manifest_id`, sets `phase` to the current FSM state, and persists it on AgentState (latest manifest replaces previous; implementations MAY retain history for audit). +2. Before **every LLM call** (the heartbeat), the harness recompiles the manifest from current state, assigns a fresh `manifest_id`, sets `phase` to the current FSM state, and persists it on PipelineState (latest manifest replaces previous; implementations MAY retain history for audit). 3. The harness injects the **compact text** form into the system message preamble (or equivalent model-facing channel). 4. After tool execution, budget and permission changes are reflected in the next heartbeat automatically — no explicit "refresh" step is needed because every heartbeat reads current state. 5. The **audit envelope** references the final manifest id or embeds hashes of canonical JSON as required by RFC 0048. @@ -181,8 +181,8 @@ Typical deployments SHOULD target **under 200 tokens** for a five-tool setup in - RFC 0007 — Governed FSM (injection points `frame`, `critique_verify`). - RFC 0016 — Tool Capability Negotiation (manifest as runtime realization of negotiated capabilities). - RFC 0017 — Safety & Sandboxing (sandbox feeds allow/deny into manifest). -- RFC 0021 — Agent Capability Declaration (declared vs manifest-granted capabilities). -- RFC 0026 — Agent Identity (`trust_level`). +- RFC 0021 — Cognitive pipeline Capability Declaration (declared vs manifest-granted capabilities). +- RFC 0026 — Cognitive pipeline Identity (`trust_level`). - RFC 0038 — Cost-Aware Budget (budget snapshot fields). - RFC 0041 — Policy Enforcement (access levels and constraints). - RFC 0042 — Permissions (`pre_authorized` vs `requires_delegation`). @@ -202,7 +202,7 @@ Typical deployments SHOULD target **under 200 tokens** for a five-tool setup in { "manifest_id": "cm_01jqzexample0001", "run_id": "run_8f3c2a", - "agent_id": "agent_researcher_eu", + "requester_id": "agent_researcher_eu", "timestamp": "2026-04-18T14:22:05Z", "phase": "frame", "tools": { @@ -260,11 +260,11 @@ constraints: max 5 results per search, no raw HTML in search excerpts Conformance for a harness implementation is indicated by all of the following: -- The harness compiles the manifest from tool registry, sandbox configuration, active policy rules, and budget tracker state, joined with agent identity for `trust_level`. +- The harness compiles the manifest from tool registry, sandbox configuration, active policy rules, and budget tracker state, joined with cognitive pipeline identity for `trust_level`. - The manifest heartbeat fires before **every LLM call** (at minimum: `frame`, `plan`, `critique_verify`, `finalize`) using the compact text format. - For representative five-tool profiles, compact serialization stays **under 200 tokens** (excluding outer system prompt boilerplate). - Structured JSON validates against the schema in §7. -- Each run retains manifest history or the latest manifest on **AgentState** suitable for audit. +- Each run retains manifest history or the latest manifest on **PipelineState** suitable for audit. - Automated tests cover manifest **compilation** from synthetic registry/policy inputs and **round-trip consistency** between JSON and compact text for a fixed fixture set. ## 15. Security considerations diff --git a/rfcs/0050-toon-adapter.md b/rfcs/0050-toon-adapter.md index 213d0ee..7a34d07 100644 --- a/rfcs/0050-toon-adapter.md +++ b/rfcs/0050-toon-adapter.md @@ -30,7 +30,7 @@ The adapter generalizes what `manifestToCompactText` does today into a reusable, - A bidirectional adapter: `toToon(object, schema?)` and `fromToon(toonString, schema?)`. - Schema-to-header generation: `schemaToToonHeader(jsonSchema)`. - A TOON serializer for capability manifests (`manifestToToon`) alongside the existing compact text. -- A `wire_format` configuration option on agent configs (`"json" | "compact-text" | "toon"`). +- A `wire_format` configuration option on cognitive pipeline configs (`"json" | "compact-text" | "toon"`). - Documentation, experiment card, and example fixtures. **Non-goals:** @@ -38,7 +38,7 @@ The adapter generalizes what `manifestToCompactText` does today into a reusable, - TOON is **never normative**. It is never stored in audit envelopes, trace archives, or harness-to-harness interchange. - TOON does not replace JSON Schema validation. All TOON output is validated by converting back to JSON and running Ajv. - TOON does not define a new schema language. The inline header is a serialization hint, not a type system. -- This RFC does not mandate TOON adoption. It is opt-in per agent or backend configuration. +- This RFC does not mandate TOON adoption. It is opt-in per cognitive pipeline or backend configuration. ## 4. Normative requirements @@ -140,7 +140,7 @@ constraints: no network after step 5; read-only filesystem [/toon:capability_manifest] ``` -This replaces `manifestToCompactText` when `wire_format` is `"toon"`. The structured JSON manifest on `AgentState` is unchanged. +This replaces `manifestToCompactText` when `wire_format` is `"toon"`. The structured JSON manifest on `PipelineState` is unchanged. ## 9. Configuration @@ -150,7 +150,7 @@ interface WireFormatConfig { } ``` -Added as an optional field on `GovernedAgentConfig` and as a parameter on `runChatAgent`. Default: `"compact-text"`. +Added as an optional field on `GovernedPipelineConfig` and as a parameter on `runChatPipeline`. Default: `"compact-text"`. The manifest heartbeat and any future schema injections select the serializer based on this setting: @@ -188,6 +188,6 @@ The following published work supports the token-efficiency claims motivating thi - `toToon` and `fromToon` round-trip for all schemas in the registry without validation errors. - `manifestToToon` output is under 200 tokens for a five-tool profile (matching RFC 0049 target). -- Governed agent demo completes successfully with `wire_format: "toon"`. +- Governed cognitive pipeline demo completes successfully with `wire_format: "toon"`. - Token count comparison (JSON vs compact-text vs TOON) is documented for capability manifest and reasoning trace fixtures. - No change in behavior for existing users who do not set `wire_format`. diff --git a/rfcs/0052-cognitive-artifact-and-capability-snapshot.md b/rfcs/0052-cognitive-artifact-and-capability-snapshot.md index 24d6e4d..033d1ba 100644 --- a/rfcs/0052-cognitive-artifact-and-capability-snapshot.md +++ b/rfcs/0052-cognitive-artifact-and-capability-snapshot.md @@ -25,6 +25,9 @@ any side effect. - `cognitive_artifact`: typed proposal emitted from the cognitive step. - `execution_intent`: requested endpoint execution tied to a snapshot and capability digest. +- `reasoning_trace`: cognitive evidence explaining the path from objective to + proposal. It can carry detailed evidence, an audit-safe summary, or a + redacted evidence record. - `observation`: structured evidence recorded during reconciliation. ## 3. Normative requirements @@ -35,8 +38,14 @@ any side effect. before execution. - A runtime MUST validate arguments against the original capability input schema. -- Reasoning traces are explanatory audit material only. They are not proof, +- Reasoning traces are evidentiary audit material. They help reviewers + understand how the cognitive step reached a proposal, but they are not proof, authorization, or trusted state. +- A reasoning trace SHOULD declare whether it contains detailed evidence, + audit-safe summary material, or redacted evidence. Redaction metadata SHOULD + explain why detail is unavailable. +- A runtime MUST NOT infer permission from reasoning content. Permission comes + only from policy gates, validated capability snapshots, and reconciliation. ## 4. Runtime neutrality diff --git a/schemas/registry.json b/schemas/registry.json index 6a0bcbd..ce19112 100644 --- a/schemas/registry.json +++ b/schemas/registry.json @@ -8,40 +8,40 @@ "branching": "schemas/rfc-0004-branching.json", "reward": "schemas/rfc-0005-reward.json", "ensemble": "schemas/rfc-0006-ensemble.json", - "agent_loop": "schemas/rfc-0007-agent-loop.json", + "cognitive_pipeline": "schemas/rfc-0007-cognitive-pipeline.json", "dataset_packaging": "schemas/rfc-0008-dataset.json", "reward_fusion": "schemas/rfc-0009-reward-fusion.json", - "agent_memory": "schemas/rfc-0010-memory.json", - "multi_agent_protocol": "schemas/rfc-0011-multi-agent-protocol.json", + "cognitive_context": "schemas/rfc-0010-cognitive-context.json", + "multi_party_cognition_protocol": "schemas/rfc-0011-multi-party-cognition-protocol.json", "dataset_streaming": "schemas/rfc-0012-dataset-streaming.json", "memory_compression": "schemas/rfc-0013-memory-compression.json", "memory_conflict_resolution": "schemas/rfc-0014-memory-conflict-resolution.json", - "multi_agent_reward_sharing": "schemas/rfc-0015-multi-agent-reward-sharing.json", + "multi_party_reward_sharing": "schemas/rfc-0015-multi-party-reward-sharing.json", "tool_capability_negotiation": "schemas/rfc-0016-tool-capability-negotiation.json", - "agent_safety_sandboxing": "schemas/rfc-0017-agent-safety-sandboxing.json", + "runtime_safety_sandboxing": "schemas/rfc-0017-runtime-safety-sandboxing.json", "tool_error_taxonomy": "schemas/rfc-0018-tool-error-taxonomy.json", - "multi_agent_planning_graphs": "schemas/rfc-0019-multi-agent-planning-graphs.json", + "collaborative_planning_graphs": "schemas/rfc-0019-collaborative-planning-graphs.json", "verifiable_scratchpad_compression": "schemas/rfc-0020-verifiable-scratchpad-compression.json", - "agent_capability_declaration": "schemas/rfc-0021-agent-capability-declaration.json", - "agent_evaluation_protocol": "schemas/rfc-0022-agent-evaluation-protocol.json", + "capability_declaration": "schemas/rfc-0021-capability-declaration.json", + "cognitive_evaluation_protocol": "schemas/rfc-0022-cognitive-evaluation-protocol.json", "human_in_the_loop": "schemas/rfc-0023-human-in-the-loop.json", "multi_modal_reasoning": "schemas/rfc-0024-multi-modal-reasoning.json", "tool_marketplace_registry": "schemas/rfc-0025-tool-marketplace-registry.json", - "agent_identity_auth": "schemas/rfc-0026-agent-identity-auth.json", - "distributed_agent_execution": "schemas/rfc-0027-distributed-agent-execution.json", - "agent_environment": "schemas/rfc-0028-agent-environment.json", - "agent_benchmark_dataset": "schemas/rfc-0029-agent-benchmark-dataset.json", - "agent_lifecycle_versioning": "schemas/rfc-0030-agent-lifecycle-versioning.json", - "agent_observability_telemetry": "schemas/rfc-0031-agent-observability-telemetry.json", - "agent_deployment_manifest": "schemas/rfc-0032-agent-deployment-manifest.json", - "agent_security_threat_model": "schemas/rfc-0033-agent-security-threat-model.json", - "agent_federation_protocol": "schemas/rfc-0034-agent-federation-protocol.json", + "requester_identity_auth": "schemas/rfc-0026-requester-identity-auth.json", + "distributed_execution": "schemas/rfc-0027-distributed-execution.json", + "capability_environment": "schemas/rfc-0028-capability-environment.json", + "cognitive_benchmark_dataset": "schemas/rfc-0029-cognitive-benchmark-dataset.json", + "runtime_lifecycle_versioning": "schemas/rfc-0030-runtime-lifecycle-versioning.json", + "cognitive_observability_telemetry": "schemas/rfc-0031-cognitive-observability-telemetry.json", + "runtime_deployment_manifest": "schemas/rfc-0032-runtime-deployment-manifest.json", + "runtime_security_threat_model": "schemas/rfc-0033-runtime-security-threat-model.json", + "cognitive_federation_protocol": "schemas/rfc-0034-cognitive-federation-protocol.json", "data_provenance_tracking": "schemas/rfc-0035-data-provenance-tracking.json", - "agent_native_compression_delta": "schemas/rfc-0036-agent-native-compression-delta.json", + "cognitive_native_compression_delta": "schemas/rfc-0036-cognitive-native-compression-delta.json", "token_economy_cost_modeling": "schemas/rfc-0037-token-economy-cost-modeling.json", "cost_aware_reasoning_budget": "schemas/rfc-0038-cost-aware-reasoning-budget.json", "tool_cost_modeling": "schemas/rfc-0039-tool-cost-modeling.json", - "multi_agent_economic_incentives": "schemas/rfc-0040-multi-agent-economic-incentives.json", + "multi_party_economic_incentives": "schemas/rfc-0040-multi-party-economic-incentives.json", "policy_enforcement": "schemas/rfc-0041-policy-enforcement.json", "permission_acl": "schemas/rfc-0042-permission-acl.json", "auditing_compliance_logs": "schemas/rfc-0043-auditing-compliance-logs.json", diff --git a/schemas/rfc-0007-agent-loop.json b/schemas/rfc-0007-cognitive-pipeline.json similarity index 58% rename from schemas/rfc-0007-agent-loop.json rename to schemas/rfc-0007-cognitive-pipeline.json index 6382e2e..43f9b1b 100644 --- a/schemas/rfc-0007-agent-loop.json +++ b/schemas/rfc-0007-cognitive-pipeline.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0007 — Agent loop protocol trace", - "description": "Reasoning trace emitted by an agent loop (RFC 0007). Extends RFC 0001 with optional tool_invocation on action steps (RFC 0003).", + "title": "Open CoT RFC 0007 — Cognitive Pipeline Protocol Trace", + "description": "Reasoning trace emitted by a cognitive pipeline (RFC 0007). Extends RFC 0001 with optional tool_invocation on action steps (RFC 0003).", "allOf": [ { "$ref": "rfc-0001-reasoning.json" @@ -24,10 +24,10 @@ } } ], - "$id": "https://opencot.dev/schemas/rfc-0007-agent-loop.json", + "$id": "https://opencot.dev/schemas/rfc-0007-cognitive-pipeline.json", "x-opencot": { "rfc": "0007", - "shortname": "agent_loop", - "source_rfc": "rfcs/0007-agent-loop-protocol.md" + "shortname": "cognitive_pipeline", + "source_rfc": "rfcs/0007-cognitive-pipeline-protocol.md" } } diff --git a/schemas/rfc-0010-memory.json b/schemas/rfc-0010-cognitive-context.json similarity index 88% rename from schemas/rfc-0010-memory.json rename to schemas/rfc-0010-cognitive-context.json index ab9f026..d52975f 100644 --- a/schemas/rfc-0010-memory.json +++ b/schemas/rfc-0010-cognitive-context.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "OpenCoT Agent Memory Schema v0.1", + "title": "OpenCoT Cognitive pipeline Memory Schema v0.1", "type": "object", "properties": { "version": { @@ -10,9 +10,9 @@ ], "description": "Schema version." }, - "agent_id": { + "requester_id": { "type": "string", - "description": "Unique identifier for the agent." + "description": "Unique identifier for the cognitive pipeline." }, "short_term_memory": { "type": "array", @@ -114,12 +114,12 @@ }, "required": [ "version", - "agent_id" + "requester_id" ], - "$id": "https://opencot.dev/schemas/rfc-0010-memory.json", + "$id": "https://opencot.dev/schemas/rfc-0010-cognitive-context.json", "x-opencot": { "rfc": "0010", - "shortname": "agent_memory", - "source_rfc": "rfcs/0010-agent-memory-schema.md" + "shortname": "cognitive_context", + "source_rfc": "rfcs/0010-cognitive-context-schema.md" } } diff --git a/schemas/rfc-0011-multi-agent-protocol.json b/schemas/rfc-0011-multi-party-cognition-protocol.json similarity index 81% rename from schemas/rfc-0011-multi-agent-protocol.json rename to schemas/rfc-0011-multi-party-cognition-protocol.json index 6e09871..5491e9a 100644 --- a/schemas/rfc-0011-multi-agent-protocol.json +++ b/schemas/rfc-0011-multi-party-cognition-protocol.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "OpenCoT Multi-Agent Protocol v0.2", + "title": "OpenCoT Multi-Party Protocol v0.2", "type": "object", "properties": { "version": { @@ -9,12 +9,12 @@ "0.2" ] }, - "agents": { + "pipelines": { "type": "array", "items": { "type": "object", "properties": { - "agent_id": { + "requester_id": { "type": "string" }, "role": { @@ -28,7 +28,7 @@ } }, "required": [ - "agent_id", + "requester_id", "role" ] } @@ -76,13 +76,13 @@ }, "required": [ "version", - "agents", + "pipelines", "messages" ], - "$id": "https://opencot.dev/schemas/rfc-0011-multi-agent-protocol.json", + "$id": "https://opencot.dev/schemas/rfc-0011-multi-party-cognition-protocol.json", "x-opencot": { "rfc": "0011", - "shortname": "multi_agent_protocol", - "source_rfc": "rfcs/0011-multi-agent-protocol.md" + "shortname": "multi_party_cognition_protocol", + "source_rfc": "rfcs/0011-multi-party-cognition-protocol.md" } } diff --git a/schemas/rfc-0015-multi-agent-reward-sharing.json b/schemas/rfc-0015-multi-party-reward-sharing.json similarity index 60% rename from schemas/rfc-0015-multi-agent-reward-sharing.json rename to schemas/rfc-0015-multi-party-reward-sharing.json index 7475992..13fca5c 100644 --- a/schemas/rfc-0015-multi-agent-reward-sharing.json +++ b/schemas/rfc-0015-multi-party-reward-sharing.json @@ -1,14 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RFC 0015 — Multi Agent Reward Sharing (stub)", + "title": "RFC 0015 — Multi Party Reward Sharing (stub)", "description": "Placeholder JSON Schema for RFC 0015. The RFC does not yet contain an extractable JSON Schema block; evolve this file as the RFC stabilizes.", "type": "object", "additionalProperties": true, "x-opencot": { "rfc": "0015", - "shortname": "multi_agent_reward_sharing", + "shortname": "multi_party_reward_sharing", "status": "stub", - "source_rfc": "rfcs/0015-multi-agent-reward-sharing.md" + "source_rfc": "rfcs/0015-multi-party-reward-sharing.md" }, - "$id": "https://opencot.dev/schemas/rfc-0015-multi-agent-reward-sharing.json" + "$id": "https://opencot.dev/schemas/rfc-0015-multi-party-reward-sharing.json" } diff --git a/schemas/rfc-0017-agent-safety-sandboxing.json b/schemas/rfc-0017-runtime-safety-sandboxing.json similarity index 63% rename from schemas/rfc-0017-agent-safety-sandboxing.json rename to schemas/rfc-0017-runtime-safety-sandboxing.json index 9c21bd5..0a48411 100644 --- a/schemas/rfc-0017-agent-safety-sandboxing.json +++ b/schemas/rfc-0017-runtime-safety-sandboxing.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0017 — Agent Safety & Sandboxing Configuration", - "description": "Defines sandbox policies that constrain agent behavior at runtime: which tools are permitted, step/branch limits, and memory access controls.", + "title": "Open CoT RFC 0017 — Cognitive pipeline Safety & Sandboxing Configuration", + "description": "Defines sandbox policies that constrain cognitive pipeline behavior at runtime: which tools are permitted, step/branch limits, and memory access controls.", "type": "object", "properties": { "allowed_tools": { @@ -9,7 +9,7 @@ "items": { "type": "string" }, - "description": "Tool names the agent may invoke. Use [\"*\"] to allow all." + "description": "Tool names the cognitive pipeline may invoke. Use [\"*\"] to allow all." }, "blocked_tools": { "type": "array", @@ -30,7 +30,7 @@ }, "memory_acl": { "type": "object", - "description": "Access control list mapping role or agent IDs to permission arrays.", + "description": "Access control list mapping role or requester IDs to permission arrays.", "additionalProperties": { "type": "array", "items": { @@ -51,10 +51,10 @@ "max_steps" ], "additionalProperties": true, - "$id": "https://opencot.dev/schemas/rfc-0017-agent-safety-sandboxing.json", + "$id": "https://opencot.dev/schemas/rfc-0017-runtime-safety-sandboxing.json", "x-opencot": { "rfc": "0017", - "shortname": "agent_safety_sandboxing", - "source_rfc": "rfcs/0017-agent-safety-sandboxing.md" + "shortname": "runtime_safety_sandboxing", + "source_rfc": "rfcs/0017-runtime-safety-sandboxing.md" } } diff --git a/schemas/rfc-0019-multi-agent-planning-graphs.json b/schemas/rfc-0019-collaborative-planning-graphs.json similarity index 56% rename from schemas/rfc-0019-multi-agent-planning-graphs.json rename to schemas/rfc-0019-collaborative-planning-graphs.json index 11dfc67..a9eef2b 100644 --- a/schemas/rfc-0019-multi-agent-planning-graphs.json +++ b/schemas/rfc-0019-collaborative-planning-graphs.json @@ -1,14 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RFC 0019 — Multi Agent Planning Graphs (stub)", + "title": "RFC 0019 — Collaborative Planning Graphs (stub)", "description": "Placeholder JSON Schema for RFC 0019. The RFC does not yet contain an extractable JSON Schema block; evolve this file as the RFC stabilizes.", "type": "object", "additionalProperties": true, "x-opencot": { "rfc": "0019", - "shortname": "multi_agent_planning_graphs", + "shortname": "collaborative_planning_graphs", "status": "stub", - "source_rfc": "rfcs/0019-multi-agent-planning-graphs.md" + "source_rfc": "rfcs/0019-collaborative-planning-graphs.md" }, - "$id": "https://opencot.dev/schemas/rfc-0019-multi-agent-planning-graphs.json" + "$id": "https://opencot.dev/schemas/rfc-0019-collaborative-planning-graphs.json" } diff --git a/schemas/rfc-0021-agent-capability-declaration.json b/schemas/rfc-0021-capability-declaration.json similarity index 56% rename from schemas/rfc-0021-agent-capability-declaration.json rename to schemas/rfc-0021-capability-declaration.json index 55dad67..6d6f62f 100644 --- a/schemas/rfc-0021-agent-capability-declaration.json +++ b/schemas/rfc-0021-capability-declaration.json @@ -1,14 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RFC 0021 — Agent Capability Declaration (stub)", + "title": "RFC 0021 — Capability Declaration (stub)", "description": "Placeholder JSON Schema for RFC 0021. The RFC does not yet contain an extractable JSON Schema block; evolve this file as the RFC stabilizes.", "type": "object", "additionalProperties": true, "x-opencot": { "rfc": "0021", - "shortname": "agent_capability_declaration", + "shortname": "capability_declaration", "status": "stub", - "source_rfc": "rfcs/0021-agent-capability-declaration.md" + "source_rfc": "rfcs/0021-capability-declaration.md" }, - "$id": "https://opencot.dev/schemas/rfc-0021-agent-capability-declaration.json" + "$id": "https://opencot.dev/schemas/rfc-0021-capability-declaration.json" } diff --git a/schemas/rfc-0022-agent-evaluation-protocol.json b/schemas/rfc-0022-cognitive-evaluation-protocol.json similarity index 89% rename from schemas/rfc-0022-agent-evaluation-protocol.json rename to schemas/rfc-0022-cognitive-evaluation-protocol.json index 1b13287..2ab4fad 100644 --- a/schemas/rfc-0022-agent-evaluation-protocol.json +++ b/schemas/rfc-0022-cognitive-evaluation-protocol.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0022 — Agent Evaluation Protocol", + "title": "Open CoT RFC 0022 — Cognitive pipeline Evaluation Protocol", "type": "object", "properties": { "version": { @@ -114,10 +114,10 @@ "metrics", "statistics" ], - "$id": "https://opencot.dev/schemas/rfc-0022-agent-evaluation-protocol.json", + "$id": "https://opencot.dev/schemas/rfc-0022-cognitive-evaluation-protocol.json", "x-opencot": { "rfc": "0022", - "shortname": "agent_evaluation_protocol", - "source_rfc": "rfcs/0022-agent-evaluation-protocol.md" + "shortname": "cognitive_evaluation_protocol", + "source_rfc": "rfcs/0022-cognitive-evaluation-protocol.md" } } diff --git a/schemas/rfc-0023-human-in-the-loop.json b/schemas/rfc-0023-human-in-the-loop.json index 08c5194..8921177 100644 --- a/schemas/rfc-0023-human-in-the-loop.json +++ b/schemas/rfc-0023-human-in-the-loop.json @@ -79,7 +79,7 @@ "additionalProperties": false, "required": [ "run_id", - "agent_id", + "requester_id", "step_ref" ], "properties": { @@ -87,7 +87,7 @@ "type": "string", "minLength": 1 }, - "agent_id": { + "requester_id": { "type": "string", "minLength": 1 }, @@ -101,10 +101,10 @@ "type": "object", "additionalProperties": false, "required": [ - "agent" + "pipeline" ], "properties": { - "agent": { + "pipeline": { "type": "string", "minLength": 1 }, diff --git a/schemas/rfc-0026-agent-identity-auth.json b/schemas/rfc-0026-requester-identity-auth.json similarity index 84% rename from schemas/rfc-0026-agent-identity-auth.json rename to schemas/rfc-0026-requester-identity-auth.json index ac7b3f7..2281147 100644 --- a/schemas/rfc-0026-agent-identity-auth.json +++ b/schemas/rfc-0026-requester-identity-auth.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://opencot.dev/schema/rfc0026/agent-identity.json", - "title": "Open CoT RFC 0026 — Agent Identity", + "$id": "https://opencot.dev/schema/rfc0026/requester-identity.json", + "title": "Open CoT RFC 0026 — Cognitive pipeline Identity", "type": "object", "additionalProperties": false, "properties": { @@ -11,7 +11,7 @@ "0.2" ] }, - "agent_id": { + "requester_id": { "type": "string", "minLength": 1, "pattern": "^[A-Za-z0-9._:@/-]+$" @@ -53,7 +53,7 @@ "key_algorithm": { "type": "string" }, - "parent_agent_id": { + "parent_requester_id": { "type": "string", "minLength": 1 }, @@ -67,7 +67,7 @@ }, "required": [ "schema_version", - "agent_id", + "requester_id", "display_name", "role", "trust_level", @@ -96,7 +96,7 @@ ], "x-opencot": { "rfc": "0026", - "shortname": "agent_identity_auth", - "source_rfc": "rfcs/0026-agent-identity-auth.md" + "shortname": "requester_identity_auth", + "source_rfc": "rfcs/0026-requester-identity-auth.md" } } diff --git a/schemas/rfc-0027-distributed-agent-execution.json b/schemas/rfc-0027-distributed-execution.json similarity index 96% rename from schemas/rfc-0027-distributed-agent-execution.json rename to schemas/rfc-0027-distributed-execution.json index 58ef18b..069b823 100644 --- a/schemas/rfc-0027-distributed-agent-execution.json +++ b/schemas/rfc-0027-distributed-execution.json @@ -8,7 +8,7 @@ "additionalProperties": false, "required": [ "node_id", - "agent_id", + "requester_id", "endpoint", "capabilities", "trust_level", @@ -19,7 +19,7 @@ "type": "string", "minLength": 1 }, - "agent_id": { + "requester_id": { "type": "string", "minLength": 1 }, @@ -180,7 +180,7 @@ ], "x-opencot": { "rfc": "0027", - "shortname": "distributed_agent_execution", - "source_rfc": "rfcs/0027-distributed-agent-execution-protocol.md" + "shortname": "distributed_execution", + "source_rfc": "rfcs/0027-distributed-execution-protocol.md" } } diff --git a/schemas/rfc-0028-agent-environment.json b/schemas/rfc-0028-capability-environment.json similarity index 58% rename from schemas/rfc-0028-agent-environment.json rename to schemas/rfc-0028-capability-environment.json index 1d27ee7..df13e98 100644 --- a/schemas/rfc-0028-agent-environment.json +++ b/schemas/rfc-0028-capability-environment.json @@ -1,14 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RFC 0028 — Agent Environment (stub)", + "title": "RFC 0028 — Capability Environment (stub)", "description": "Placeholder JSON Schema for RFC 0028. The RFC does not yet contain an extractable JSON Schema block; evolve this file as the RFC stabilizes.", "type": "object", "additionalProperties": true, "x-opencot": { "rfc": "0028", - "shortname": "agent_environment", + "shortname": "capability_environment", "status": "stub", - "source_rfc": "rfcs/0028-agent-to-environment-schema.md" + "source_rfc": "rfcs/0028-capability-environment-schema.md" }, - "$id": "https://opencot.dev/schemas/rfc-0028-agent-environment.json" + "$id": "https://opencot.dev/schemas/rfc-0028-capability-environment.json" } diff --git a/schemas/rfc-0029-agent-benchmark-dataset.json b/schemas/rfc-0029-cognitive-benchmark-dataset.json similarity index 57% rename from schemas/rfc-0029-agent-benchmark-dataset.json rename to schemas/rfc-0029-cognitive-benchmark-dataset.json index 8651eca..138f9f7 100644 --- a/schemas/rfc-0029-agent-benchmark-dataset.json +++ b/schemas/rfc-0029-cognitive-benchmark-dataset.json @@ -1,14 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RFC 0029 — Agent Benchmark Dataset (stub)", + "title": "RFC 0029 — Cognitive Benchmark Dataset (stub)", "description": "Placeholder JSON Schema for RFC 0029. The RFC does not yet contain an extractable JSON Schema block; evolve this file as the RFC stabilizes.", "type": "object", "additionalProperties": true, "x-opencot": { "rfc": "0029", - "shortname": "agent_benchmark_dataset", + "shortname": "cognitive_benchmark_dataset", "status": "stub", - "source_rfc": "rfcs/0029-agent-benchmark-dataset.md" + "source_rfc": "rfcs/0029-cognitive-benchmark-dataset.md" }, - "$id": "https://opencot.dev/schemas/rfc-0029-agent-benchmark-dataset.json" + "$id": "https://opencot.dev/schemas/rfc-0029-cognitive-benchmark-dataset.json" } diff --git a/schemas/rfc-0030-agent-lifecycle-versioning.json b/schemas/rfc-0030-runtime-lifecycle-versioning.json similarity index 90% rename from schemas/rfc-0030-agent-lifecycle-versioning.json rename to schemas/rfc-0030-runtime-lifecycle-versioning.json index 0d2ec42..278613d 100644 --- a/schemas/rfc-0030-agent-lifecycle-versioning.json +++ b/schemas/rfc-0030-runtime-lifecycle-versioning.json @@ -1,13 +1,13 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://opencot.dev/schema/agent-lifecycle/v0.2", - "title": "Open CoT RFC 0030 — Agent Lifecycle", + "$id": "https://opencot.dev/schema/runtime-lifecycle/v0.2", + "title": "Open CoT RFC 0030 — Cognitive pipeline Lifecycle", "definitions": { "agent_lifecycle": { "type": "object", "additionalProperties": false, "required": [ - "agent_id", + "requester_id", "version", "lifecycle_state", "created_at", @@ -18,7 +18,7 @@ "governance_ref" ], "properties": { - "agent_id": { + "requester_id": { "type": "string", "minLength": 1 }, @@ -125,7 +125,7 @@ ], "x-opencot": { "rfc": "0030", - "shortname": "agent_lifecycle_versioning", - "source_rfc": "rfcs/0030-agent-lifecycle-versioning.md" + "shortname": "runtime_lifecycle_versioning", + "source_rfc": "rfcs/0030-runtime-lifecycle-versioning.md" } } diff --git a/schemas/rfc-0031-agent-observability-telemetry.json b/schemas/rfc-0031-cognitive-observability-telemetry.json similarity index 79% rename from schemas/rfc-0031-agent-observability-telemetry.json rename to schemas/rfc-0031-cognitive-observability-telemetry.json index 327d0f5..3f49ea5 100644 --- a/schemas/rfc-0031-agent-observability-telemetry.json +++ b/schemas/rfc-0031-cognitive-observability-telemetry.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0031 — Agent Observability and Telemetry", + "title": "Open CoT RFC 0031 — Cognitive pipeline Observability and Telemetry", "type": "object", "properties": { "version": { @@ -9,7 +9,7 @@ "0.2" ] }, - "agent_id": { + "requester_id": { "type": "string" }, "observed_at": { @@ -61,15 +61,15 @@ }, "required": [ "version", - "agent_id", + "requester_id", "observed_at", "ordering", "metrics" ], - "$id": "https://opencot.dev/schemas/rfc-0031-agent-observability-telemetry.json", + "$id": "https://opencot.dev/schemas/rfc-0031-cognitive-observability-telemetry.json", "x-opencot": { "rfc": "0031", - "shortname": "agent_observability_telemetry", - "source_rfc": "rfcs/0031-agent-observability-telemtry.md" + "shortname": "cognitive_observability_telemetry", + "source_rfc": "rfcs/0031-cognitive-observability-telemetry.md" } } diff --git a/schemas/rfc-0032-agent-deployment-manifest.json b/schemas/rfc-0032-runtime-deployment-manifest.json similarity index 94% rename from schemas/rfc-0032-agent-deployment-manifest.json rename to schemas/rfc-0032-runtime-deployment-manifest.json index 9b38089..6f646e2 100644 --- a/schemas/rfc-0032-agent-deployment-manifest.json +++ b/schemas/rfc-0032-runtime-deployment-manifest.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://opencot.dev/schema/rfc0032/deployment-manifest.json", - "title": "Open CoT RFC 0032 — Agent Deployment Manifest", + "title": "Open CoT RFC 0032 — Cognitive pipeline Deployment Manifest", "type": "object", "additionalProperties": false, "$defs": { @@ -117,7 +117,7 @@ "kind": { "type": "string", "enum": [ - "agent", + "pipeline", "dataset", "tool_pack", "model", @@ -147,7 +147,7 @@ "type": "string", "minLength": 1 }, - "agent_id": { + "requester_id": { "type": "string", "minLength": 1 }, @@ -207,7 +207,7 @@ "required": [ "schema_version", "manifest_id", - "agent_id", + "requester_id", "version", "runtime", "policies", @@ -229,7 +229,7 @@ ], "x-opencot": { "rfc": "0032", - "shortname": "agent_deployment_manifest", - "source_rfc": "rfcs/0032-agent-deployment-manifest.md" + "shortname": "runtime_deployment_manifest", + "source_rfc": "rfcs/0032-runtime-deployment-manifest.md" } } diff --git a/schemas/rfc-0033-agent-security-threat-model.json b/schemas/rfc-0033-runtime-security-threat-model.json similarity index 96% rename from schemas/rfc-0033-agent-security-threat-model.json rename to schemas/rfc-0033-runtime-security-threat-model.json index 97087ba..e5ec593 100644 --- a/schemas/rfc-0033-agent-security-threat-model.json +++ b/schemas/rfc-0033-runtime-security-threat-model.json @@ -129,7 +129,7 @@ ], "x-opencot": { "rfc": "0033", - "shortname": "agent_security_threat_model", - "source_rfc": "rfcs/0033-agent-security-threat-model.md" + "shortname": "runtime_security_threat_model", + "source_rfc": "rfcs/0033-runtime-security-threat-model.md" } } diff --git a/schemas/rfc-0034-agent-federation-protocol.json b/schemas/rfc-0034-cognitive-federation-protocol.json similarity index 95% rename from schemas/rfc-0034-agent-federation-protocol.json rename to schemas/rfc-0034-cognitive-federation-protocol.json index 4417a5a..a88f2d4 100644 --- a/schemas/rfc-0034-agent-federation-protocol.json +++ b/schemas/rfc-0034-cognitive-federation-protocol.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://opencot.dev/schema/rfc0034/federation.json", - "title": "Open CoT RFC 0034 — Agent Federation Protocol", + "title": "Open CoT RFC 0034 — Cognitive pipeline Federation Protocol", "type": "object", "additionalProperties": false, "$defs": { @@ -175,7 +175,7 @@ }, "x-opencot": { "rfc": "0034", - "shortname": "agent_federation_protocol", - "source_rfc": "rfcs/0034-agent-federation-protocol.md" + "shortname": "cognitive_federation_protocol", + "source_rfc": "rfcs/0034-cognitive-federation-protocol.md" } } diff --git a/schemas/rfc-0035-data-provenance-tracking.json b/schemas/rfc-0035-data-provenance-tracking.json index 3a28ced..ccebff5 100644 --- a/schemas/rfc-0035-data-provenance-tracking.json +++ b/schemas/rfc-0035-data-provenance-tracking.json @@ -18,7 +18,7 @@ "source": { "type": "string" }, - "agent_id": { + "requester_id": { "type": "string" }, "tool_id": { diff --git a/schemas/rfc-0036-agent-native-compression-delta.json b/schemas/rfc-0036-cognitive-native-compression-delta.json similarity index 54% rename from schemas/rfc-0036-agent-native-compression-delta.json rename to schemas/rfc-0036-cognitive-native-compression-delta.json index cf2f04f..441b736 100644 --- a/schemas/rfc-0036-agent-native-compression-delta.json +++ b/schemas/rfc-0036-cognitive-native-compression-delta.json @@ -1,14 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RFC 0036 — Agent Native Compression Delta (stub)", + "title": "RFC 0036 — Cognitive Native Compression Delta (stub)", "description": "Placeholder JSON Schema for RFC 0036. The RFC does not yet contain an extractable JSON Schema block; evolve this file as the RFC stabilizes.", "type": "object", "additionalProperties": true, "x-opencot": { "rfc": "0036", - "shortname": "agent_native_compression_delta", + "shortname": "cognitive_native_compression_delta", "status": "stub", - "source_rfc": "rfcs/0036-agent-native-compression-delta.md" + "source_rfc": "rfcs/0036-cognitive-native-compression-delta.md" }, - "$id": "https://opencot.dev/schemas/rfc-0036-agent-native-compression-delta.json" + "$id": "https://opencot.dev/schemas/rfc-0036-cognitive-native-compression-delta.json" } diff --git a/schemas/rfc-0038-cost-aware-reasoning-budget.json b/schemas/rfc-0038-cost-aware-reasoning-budget.json index 4e529c6..e8e12a4 100644 --- a/schemas/rfc-0038-cost-aware-reasoning-budget.json +++ b/schemas/rfc-0038-cost-aware-reasoning-budget.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Open CoT RFC 0038 — Cost-Aware Reasoning Budget", - "description": "Budget policy and snapshot types for enforcing token, cost, step, tool-call, and retry limits on agent loops.", + "description": "Budget policy and snapshot types for enforcing token, cost, step, tool-call, and retry limits on cognitive pipelines.", "type": "object", "properties": { "budget": { @@ -16,7 +16,7 @@ "max_cost": { "type": "number", "minimum": 0, - "description": "Maximum dollar cost for the entire agent run." + "description": "Maximum dollar cost for the entire cognitive pipeline run." }, "max_steps": { "type": "integer", @@ -46,7 +46,7 @@ "soft", "warn" ], - "description": "How the budget is enforced. 'hard' force-stops the agent, 'soft' logs warnings, 'warn' emits telemetry only." + "description": "How the budget is enforced. 'hard' force-stops the cognitive pipeline, 'soft' logs warnings, 'warn' emits telemetry only." }, "snapshot": { "type": "object", diff --git a/schemas/rfc-0040-multi-agent-economic-incentives.json b/schemas/rfc-0040-multi-party-economic-incentives.json similarity index 59% rename from schemas/rfc-0040-multi-agent-economic-incentives.json rename to schemas/rfc-0040-multi-party-economic-incentives.json index 41f1caf..3d3bd67 100644 --- a/schemas/rfc-0040-multi-agent-economic-incentives.json +++ b/schemas/rfc-0040-multi-party-economic-incentives.json @@ -1,14 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RFC 0040 — Multi Agent Economic Incentives (stub)", + "title": "RFC 0040 — Multi Party Economic Incentives (stub)", "description": "Placeholder JSON Schema for RFC 0040. The RFC does not yet contain an extractable JSON Schema block; evolve this file as the RFC stabilizes.", "type": "object", "additionalProperties": true, "x-opencot": { "rfc": "0040", - "shortname": "multi_agent_economic_incentives", + "shortname": "multi_party_economic_incentives", "status": "stub", - "source_rfc": "rfcs/0040-multi-agent-economic-incentives.md" + "source_rfc": "rfcs/0040-multi-party-economic-incentives.md" }, - "$id": "https://opencot.dev/schemas/rfc-0040-multi-agent-economic-incentives.json" + "$id": "https://opencot.dev/schemas/rfc-0040-multi-party-economic-incentives.json" } diff --git a/schemas/rfc-0043-auditing-compliance-logs.json b/schemas/rfc-0043-auditing-compliance-logs.json index e36d9e2..32a14a0 100644 --- a/schemas/rfc-0043-auditing-compliance-logs.json +++ b/schemas/rfc-0043-auditing-compliance-logs.json @@ -18,7 +18,7 @@ "type": "string", "minLength": 1 }, - "agent_id": { + "requester_id": { "type": "string", "minLength": 1 }, @@ -106,7 +106,7 @@ "schema_version", "event_id", "run_id", - "agent_id", + "requester_id", "observed_at", "event_type", "details", diff --git a/schemas/rfc-0044-governance-organizational-controls.json b/schemas/rfc-0044-governance-organizational-controls.json index 315d4ee..063b226 100644 --- a/schemas/rfc-0044-governance-organizational-controls.json +++ b/schemas/rfc-0044-governance-organizational-controls.json @@ -21,7 +21,7 @@ "global", "organization", "team", - "agent" + "pipeline" ] }, "scope_id": { diff --git a/schemas/rfc-0045-ethics.json b/schemas/rfc-0045-ethics.json index 69709ba..42e236c 100644 --- a/schemas/rfc-0045-ethics.json +++ b/schemas/rfc-0045-ethics.json @@ -91,6 +91,6 @@ "x-opencot": { "rfc": "0045", "shortname": "ethics", - "source_rfc": "rfcs/0045-ethical-risk-contraints-reasoning-agents.md" + "source_rfc": "rfcs/0045-ethics-risk-constraints-cognitive-runtimes.md" } } diff --git a/schemas/rfc-0048-execution-receipts-audit-envelopes.json b/schemas/rfc-0048-execution-receipts-audit-envelopes.json index 24d48d5..8810b46 100644 --- a/schemas/rfc-0048-execution-receipts-audit-envelopes.json +++ b/schemas/rfc-0048-execution-receipts-audit-envelopes.json @@ -262,7 +262,7 @@ "type": "string", "minLength": 1 }, - "agent_id": { + "requester_id": { "type": "string", "minLength": 1 }, @@ -400,7 +400,7 @@ "schema_version", "envelope_id", "run_id", - "agent_id", + "requester_id", "task_hash", "started_at", "completed_at", diff --git a/schemas/rfc-0049-capability-manifest.json b/schemas/rfc-0049-capability-manifest.json index 9068f8a..fb6d341 100644 --- a/schemas/rfc-0049-capability-manifest.json +++ b/schemas/rfc-0049-capability-manifest.json @@ -7,7 +7,7 @@ "required": [ "manifest_id", "run_id", - "agent_id", + "requester_id", "timestamp", "phase", "tools", @@ -23,7 +23,7 @@ "type": "string", "minLength": 1 }, - "agent_id": { + "requester_id": { "type": "string", "minLength": 1 }, diff --git a/schemas/rfc-0052-cognitive-artifact.json b/schemas/rfc-0052-cognitive-artifact.json index 6a8814d..d5fef7d 100644 --- a/schemas/rfc-0052-cognitive-artifact.json +++ b/schemas/rfc-0052-cognitive-artifact.json @@ -136,7 +136,8 @@ "required": [ "step_id", "kind", - "content" + "content", + "visibility" ], "properties": { "step_id": { @@ -156,6 +157,17 @@ "content": { "type": "string" }, + "visibility": { + "type": "string", + "enum": [ + "audit_summary", + "detailed_evidence", + "redacted" + ] + }, + "redaction_reason": { + "type": "string" + }, "confidence": { "type": "number", "minimum": 0, @@ -163,6 +175,41 @@ } } }, + "reasoning_trace": { + "type": "object", + "additionalProperties": false, + "required": [ + "evidence_mode", + "summary", + "steps" + ], + "properties": { + "evidence_mode": { + "type": "string", + "enum": [ + "audit_summary", + "detailed_evidence", + "redacted_evidence" + ] + }, + "summary": { + "type": "string", + "description": "Audit-safe explanation of the cognitive path. This is evidence, not authority." + }, + "steps": { + "type": "array", + "items": { + "$ref": "#/$defs/reasoning_trace_step" + } + }, + "contains_sensitive_content": { + "type": "boolean" + }, + "redaction_reason": { + "type": "string" + } + } + }, "execution_intent": { "type": "object", "additionalProperties": false, @@ -308,10 +355,7 @@ } }, "reasoning_trace": { - "type": "array", - "items": { - "$ref": "#/$defs/reasoning_trace_step" - } + "$ref": "#/$defs/reasoning_trace" }, "execution_intent": { "type": "array", diff --git a/tests/test_mock_harness.py b/tests/test_mock_harness.py index 22e05e9..a24ab4f 100644 --- a/tests/test_mock_harness.py +++ b/tests/test_mock_harness.py @@ -7,13 +7,13 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) -from reference.python.agent_loop_runner import run_mock_agent_loop # noqa: E402 +from reference.python.cognitive_pipeline_runner import run_mock_cognitive_pipeline # noqa: E402 from reference.python.validator import validate_trace # noqa: E402 -def test_mock_agent_loop_trace_validates() -> None: +def test_mock_cognitive_pipeline_trace_validates() -> None: task = "Find the population of Tokyo and compute its square root." - trace, verifier_output = run_mock_agent_loop(task) + trace, verifier_output = run_mock_cognitive_pipeline(task) validate_trace(trace) assert verifier_output["version"] == "0.1" assert trace["final_answer"] diff --git a/tools/diff_checker.py b/tools/diff_checker.py index 2997463..a1bc0ce 100644 --- a/tools/diff_checker.py +++ b/tools/diff_checker.py @@ -15,7 +15,7 @@ - minor (new optional capability) - patch (non-semantic or informational) -These severities inform, but do not automatically decide, registry semver bumps. +These severities inform, but do not automatically reconcile, registry semver bumps. """ from __future__ import annotations @@ -27,6 +27,11 @@ from typing import Any SEVERITY_ORDER = {"patch": 0, "minor": 1, "major": 2} +PROPERTY_RENAMES = { + "agent_id": "requester_id", + "agents": "pipelines", + "parent_agent_id": "parent_requester_id", +} def _norm_type(t: Any) -> str | None: @@ -59,6 +64,12 @@ def _record(findings: list[tuple[str, str]], severity: str, msg: str) -> None: findings.append((severity, msg)) +def _renamed_properties(before_keys: set[str], after_keys: set[str]) -> dict[str, str]: + return { + before: after for before, after in PROPERTY_RENAMES.items() if before in before_keys and after in after_keys + } + + def _tightened_min(before: dict[str, Any], after: dict[str, Any], key: str) -> bool: b = before.get(key) a = after.get(key) @@ -130,16 +141,22 @@ def _compare(before: Any, after: Any, path: str, *, findings: list[tuple[str, st b_req = set(_required_list(before)) a_req = set(_required_list(after)) - for name in sorted(b_req - a_req): + required_renames = _renamed_properties(b_req, a_req) + renamed_before_required = set(required_renames) + renamed_after_required = set(required_renames.values()) + for name in sorted((b_req - a_req) - renamed_before_required): _record(findings, "major", f"{path}: removed from required: {name!r}") - for name in sorted(a_req - b_req): + for name in sorted((a_req - b_req) - renamed_after_required): _record(findings, "minor", f"{path}: added to required: {name!r}") b_props = _props(before) a_props = _props(after) - for key in sorted(set(b_props) - set(a_props)): + property_renames = _renamed_properties(set(b_props), set(a_props)) + renamed_before_props = set(property_renames) + renamed_after_props = set(property_renames.values()) + for key in sorted((set(b_props) - set(a_props)) - renamed_before_props): _record(findings, "major", f"{path}: removed property {key!r}") - for key in sorted(set(a_props) - set(b_props)): + for key in sorted((set(a_props) - set(b_props)) - renamed_after_props): _record(findings, "minor", f"{path}: added property {key!r}") _constraint_diffs(before, after, path, findings) @@ -153,6 +170,15 @@ def _compare(before: Any, after: Any, path: str, *, findings: list[tuple[str, st elif isinstance(bp, dict) != isinstance(ap, dict): _record(findings, "major", f"{sub}: property shape changed (object vs non-object)") + for before_key, after_key in sorted(property_renames.items()): + bp = b_props[before_key] + ap = a_props[after_key] + sub = f"{path}.properties.{before_key}->{after_key}" + if isinstance(bp, dict) and isinstance(ap, dict): + _compare(bp, ap, sub, findings=findings) + elif isinstance(bp, dict) != isinstance(ap, dict): + _record(findings, "major", f"{sub}: property shape changed (object vs non-object)") + # Recurse into item and additionalProperties schemas when both are schema objects. b_items = before.get("items") a_items = after.get("items") @@ -169,6 +195,25 @@ def load_schema(path: Path) -> Any: return json.loads(path.read_text(encoding="utf-8")) +def schema_identity(path: Path) -> str: + data = load_schema(path) + meta = data.get("x-opencot") if isinstance(data, dict) else None + rfc = meta.get("rfc") if isinstance(meta, dict) else None + return f"rfc:{rfc}" if isinstance(rfc, str) and rfc else f"name:{path.name}" + + +def index_schema_dir(path: Path) -> dict[str, Path]: + indexed: dict[str, Path] = {} + for schema_path in path.glob("*.json"): + if schema_path.name == "registry.json": + continue + identity = schema_identity(schema_path) + if identity in indexed: + raise RuntimeError(f"Duplicate schema identity {identity!r}: {indexed[identity].name}, {schema_path.name}") + indexed[identity] = schema_path + return indexed + + def compare_files(before: Path, after: Path) -> list[tuple[str, str]]: findings: list[tuple[str, str]] = [] _compare(load_schema(before), load_schema(after), before.name, findings=findings) @@ -197,14 +242,14 @@ def main() -> int: if args.before.is_file() and args.after.is_file(): findings.extend(compare_files(args.before, args.after)) elif args.before.is_dir() and args.after.is_dir(): - b_names = {p.name for p in args.before.glob("*.json")} - a_names = {p.name for p in args.after.glob("*.json")} - for removed in sorted((b_names - a_names) - {"registry.json"}): - findings.append(("major", f"{removed}: schema file removed")) - for added in sorted((a_names - b_names) - {"registry.json"}): - findings.append(("minor", f"{added}: schema file added")) - for name in sorted((b_names & a_names) - {"registry.json"}): - findings.extend(compare_files(args.before / name, args.after / name)) + before_index = index_schema_dir(args.before) + after_index = index_schema_dir(args.after) + for removed in sorted(set(before_index) - set(after_index)): + findings.append(("major", f"{before_index[removed].name}: schema file removed")) + for added in sorted(set(after_index) - set(before_index)): + findings.append(("minor", f"{after_index[added].name}: schema file added")) + for identity in sorted(set(before_index) & set(after_index)): + findings.extend(compare_files(before_index[identity], after_index[identity])) else: print("before and after must both be files or both be directories", file=sys.stderr) return 2 diff --git a/tools/schema_lib.py b/tools/schema_lib.py index 1028379..c746fdc 100644 --- a/tools/schema_lib.py +++ b/tools/schema_lib.py @@ -22,40 +22,40 @@ "0004": "branching", "0005": "reward", "0006": "ensemble", - "0007": "agent_loop", + "0007": "cognitive_pipeline", "0008": "dataset_packaging", "0009": "reward_fusion", - "0010": "agent_memory", - "0011": "multi_agent_protocol", + "0010": "cognitive_context", + "0011": "multi_party_cognition_protocol", "0012": "dataset_streaming", "0013": "memory_compression", "0014": "memory_conflict_resolution", - "0015": "multi_agent_reward_sharing", + "0015": "multi_party_reward_sharing", "0016": "tool_capability_negotiation", - "0017": "agent_safety_sandboxing", + "0017": "runtime_safety_sandboxing", "0018": "tool_error_taxonomy", - "0019": "multi_agent_planning_graphs", + "0019": "collaborative_planning_graphs", "0020": "verifiable_scratchpad_compression", - "0021": "agent_capability_declaration", - "0022": "agent_evaluation_protocol", + "0021": "capability_declaration", + "0022": "cognitive_evaluation_protocol", "0023": "human_in_the_loop", "0024": "multi_modal_reasoning", "0025": "tool_marketplace_registry", - "0026": "agent_identity_auth", - "0027": "distributed_agent_execution", - "0028": "agent_environment", - "0029": "agent_benchmark_dataset", - "0030": "agent_lifecycle_versioning", - "0031": "agent_observability_telemetry", - "0032": "agent_deployment_manifest", - "0033": "agent_security_threat_model", - "0034": "agent_federation_protocol", + "0026": "requester_identity_auth", + "0027": "distributed_execution", + "0028": "capability_environment", + "0029": "cognitive_benchmark_dataset", + "0030": "runtime_lifecycle_versioning", + "0031": "cognitive_observability_telemetry", + "0032": "runtime_deployment_manifest", + "0033": "runtime_security_threat_model", + "0034": "cognitive_federation_protocol", "0035": "data_provenance_tracking", - "0036": "agent_native_compression_delta", + "0036": "cognitive_native_compression_delta", "0037": "token_economy_cost_modeling", "0038": "cost_aware_reasoning_budget", "0039": "tool_cost_modeling", - "0040": "multi_agent_economic_incentives", + "0040": "multi_party_economic_incentives", "0041": "policy_enforcement", "0042": "permission_acl", "0043": "auditing_compliance_logs", @@ -83,10 +83,10 @@ "0004": "branching", "0005": "reward", "0006": "ensemble", - "0007": "agent-loop", + "0007": "cognitive-pipeline", "0008": "dataset", "0009": "reward-fusion", - "0010": "memory", + "0010": "cognitive-context", } diff --git a/tools/sync_schemas_from_rfcs.py b/tools/sync_schemas_from_rfcs.py index 2441011..0913f76 100644 --- a/tools/sync_schemas_from_rfcs.py +++ b/tools/sync_schemas_from_rfcs.py @@ -57,12 +57,12 @@ def build_branching_schema(rfc_id: str) -> dict[str, Any]: } -def build_agent_loop_schema() -> dict[str, Any]: +def build_cognitive_pipeline_schema() -> dict[str, Any]: return { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Open CoT RFC 0007 — Agent loop protocol trace", + "title": "Open CoT RFC 0007 — Cognitive Pipeline Protocol Trace", "description": ( - "Reasoning trace emitted by an agent loop (RFC 0007). " + "Reasoning trace emitted by a cognitive pipeline (RFC 0007). " "Extends RFC 0001 with optional tool_invocation on action steps (RFC 0003)." ), "allOf": [ @@ -185,7 +185,7 @@ def main() -> int: elif rfc_id == "0004": data = build_branching_schema(rfc_id) elif rfc_id == "0007": - data = build_agent_loop_schema() + data = build_cognitive_pipeline_schema() elif rfc_id == "0008": data = build_dataset_packaging_schema() else: diff --git a/tools/validate.py b/tools/validate.py index 6a22338..42c0222 100644 --- a/tools/validate.py +++ b/tools/validate.py @@ -27,7 +27,7 @@ "branching", "reward", "ensemble", - "agent_loop", + "cognitive_pipeline", "dataset_packaging", ) @@ -192,10 +192,10 @@ def _check_conformance_profiles() -> list[str]: def _cross_consistency(loaded: dict[str, dict[str, Any]]) -> list[str]: """Lightweight checks across known pairs.""" warnings: list[str] = [] - if "rfc-0007-agent-loop.json" in loaded: - s = json.dumps(loaded["rfc-0007-agent-loop.json"]) + if "rfc-0007-cognitive-pipeline.json" in loaded: + s = json.dumps(loaded["rfc-0007-cognitive-pipeline.json"]) if "rfc-0001-reasoning.json" not in s: - warnings.append("rfc-0007-agent-loop.json should reference rfc-0001-reasoning.json") + warnings.append("rfc-0007-cognitive-pipeline.json should reference rfc-0001-reasoning.json") return warnings