Intradyne
diff --git a/‎.claude/settings.json‎
Lines changed: 231 additions & 1 deletion b/‎.claude/settings.json‎
Lines changed: 231 additions & 1 deletion
diff --git a/‎.env.example‎
Lines changed: 100 additions & 54 deletions b/‎.env.example‎
Lines changed: 100 additions & 54 deletions
diff --git a/‎.gitignore‎
Lines changed: 15 additions & 0 deletions b/‎.gitignore‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎.tmp_inspect_db.py‎
Lines changed: 15 additions & 0 deletions b/‎.tmp_inspect_db.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 78 additions & 7 deletions b/‎CLAUDE.md‎
Lines changed: 78 additions & 7 deletions
diff --git a/‎Caddyfile‎
Lines changed: 17 additions & 0 deletions b/‎Caddyfile‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 5 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎METERING.md‎
Lines changed: 24 additions & 0 deletions b/‎METERING.md‎
Lines changed: 24 additions & 0 deletions
@@ -1,78 +1,124 @@
 # FormicOS environment configuration
-# Copy to .env and configure for your setup.
+# Copy to .env and set your API key. That's it.
 #
-# Default stack: llama.cpp (GPU, Blackwell-native) + Qwen3-Embedding sidecar + Qdrant.
-# See docs/DEPLOYMENT.md for the full deployment guide.
-# See docker-compose.yml for service definitions.
+# Cloud-first by default: 3 containers, no GPU needed.
+# For local GPU: uncomment the "Local GPU" section below,
+# then run: bash scripts/setup-local-gpu.sh
 
-# --- Local LLM Docker image ---
-# Default: Blackwell-native image (sm_120, CUDA 12.8). Build first:
-#   bash scripts/build_llm_image.sh
-#
-# Fallback for non-Blackwell GPUs (PTX JIT, ~10x slower on RTX 5090,
-# --fit on auto-sizes context down to ~16k):
-# LLM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-cuda
-
-# --- Cloud LLM API keys (optional — enables cloud model routing) ---
-# ANTHROPIC_API_KEY=sk-ant-...
+# --- Cloud API keys (set at least one) ---
+ANTHROPIC_API_KEY=sk-ant-...
 # GEMINI_API_KEY=AI...
+# OPENAI_API_KEY=sk-...
 # DEEPSEEK_API_KEY=sk-...
-# MINIMAX_API_KEY=eyJ...
 
-# --- Local LLM model file ---
-# Override the default Qwen3-30B-A3B model:
-# LLM_MODEL_FILE=Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf
+# --- Project binding (Wave 81) ---
+# Set to your project directory. Colonies will read/write against this root.
+# PROJECT_DIR=/path/to/your/project
 
-# --- Local LLM context size ---
-# Default: 80000. With --fit on, llama.cpp auto-sizes KV cache to VRAM.
-# The Blackwell image supports 80k on RTX 5090 (32 GB) with comfortable headroom.
-# The generic CUDA image falls back to ~16k via PTX JIT.
-# Must match config/formicos.yaml context_window for llama-cpp/gpt-4.
-# LLM_CONTEXT_SIZE=80000
-
-# --- Local LLM parallel slots ---
-# Number of concurrent inference slots. The adapter reads this env var
-# to set its concurrency semaphore — no manual coupling needed.
+# --- Local GPU override (uncomment entire block) ---
+# COMPOSE_PROFILES=local-gpu
+# QUEEN_MODEL=llama-cpp/qwen3.5-35b
+# CODER_MODEL=llama-cpp/qwen3.5-35b
+# REVIEWER_MODEL=llama-cpp/qwen3.5-35b
+# RESEARCHER_MODEL=llama-cpp/qwen3.5-35b
+# ARCHIVIST_MODEL=llama-cpp/qwen3.5-35b
+# LLM_HOST=http://llm:8080
+# EMBED_URL=http://formicos-embed:8200
+# EMBED_MODEL=nomic-ai/nomic-embed-text-v1.5
+# EMBED_DIMENSIONS=768
+#
+# Local LLM tuning:
+# LLM_IMAGE=local/llama.cpp:server-cuda-blackwell
+# LLM_MODEL_FILE=Qwen3.5-35B-A3B-Q4_K_M.gguf
+# LLM_MODEL_ALIAS=qwen3.5-35b
+# LLM_CHAT_TEMPLATE_ARGS=--chat-template-file /config/qwen35-chat.jinja
+# LLM_FLASH_ATTN=on
+# LLM_CACHE_TYPE_K=q4_0
+# LLM_CACHE_TYPE_V=q4_0
+# LLM_BATCH_SIZE=8192
+# LLM_UBATCH_SIZE=4096
+# LLM_CONTEXT_SIZE=65536
 # LLM_SLOTS=2
-
-# --- Slot prompt similarity ---
-# Controls how aggressively slots reuse cached prompt prefixes (0.0-1.0).
-# Higher values = more aggressive reuse. Good for multi-agent shared prompts.
 # LLM_SLOT_PROMPT_SIMILARITY=0.5
-
-# --- Prompt cache in system RAM (MB) ---
-# Stores previously computed KV cache states for prefix reuse.
-# Free performance for multi-agent workloads with shared system prompts.
 # LLM_CACHE_RAM=1024
-
-# --- Embedding GPU layers ---
-# Set to 0 to move embedding model to CPU, freeing ~700 MB VRAM.
-# Useful if VRAM is tight at 131k context.
 # EMBED_GPU_LAYERS=99
+# CUDA_DEVICE=0
 
-# --- Local LLM port (host-side) ---
-# LLM_PORT=8008
+# --- Devstral local eval (24B dense; safer defaults on 32 GB VRAM) ---
+# COMPOSE_PROFILES=local-gpu
+# QUEEN_MODEL=llama-cpp/devstral-small-2-24b
+# CODER_MODEL=llama-cpp/devstral-small-2-24b
+# REVIEWER_MODEL=llama-cpp/devstral-small-2-24b
+# RESEARCHER_MODEL=llama-cpp/devstral-small-2-24b
+# ARCHIVIST_MODEL=llama-cpp/devstral-small-2-24b
+# FORMICOS_ENV_FILE=.env.devstral
+# LLM_HOST=http://llm:8080
+# EMBED_URL=http://formicos-embed:8200
+# LLM_MODEL_FILE=mistralai_Devstral-Small-2-24B-Instruct-2512-Q4_K_M.gguf
+# LLM_MODEL_ALIAS=devstral-small-2-24b
+# LLM_CHAT_TEMPLATE_ARGS=
+# LLM_FLASH_ATTN=off
+# LLM_CACHE_TYPE_K=f16
+# LLM_CACHE_TYPE_V=f16
+# LLM_BATCH_SIZE=4096
+# LLM_UBATCH_SIZE=2048
+# LLM_CACHE_RAM=0
+# LLM_CONTEXT_SIZE=32768
+# LLM_SLOTS=3
 
-# --- Model directory (shared by LLM and embedding sidecar) ---
-# LLM_MODEL_DIR=./.models
+# --- Hybrid routing (GPU + API keys — RECOMMENDED for local GPU users) ---
+# Queen on cloud (unlimited context), colonies on local GPU (fast parallel).
+# COMPOSE_PROFILES=local-gpu
+# QUEEN_MODEL=anthropic/claude-sonnet-4-6
+# CODER_MODEL=llama-cpp/qwen3.5-35b
+# REVIEWER_MODEL=llama-cpp/qwen3.5-35b
+# RESEARCHER_MODEL=anthropic/claude-haiku-4-5
+# ARCHIVIST_MODEL=llama-cpp/qwen3.5-35b
+# LLM_SLOTS=3
 
-# --- GPU device index ---
-# Pin LLM + embedding to a specific GPU. Essential on multi-GPU systems.
-# Docker Desktop / WSL2 ignores device_ids in compose deploy blocks;
-# CUDA_VISIBLE_DEVICES (set via this variable) is the effective control.
-# Without pinning, llama.cpp may split layers across GPUs causing segfaults.
+# --- Multi-GPU pinning ---
+# Each GPU-using service has its own device variable.
+# Default: everything on GPU 0. Multi-GPU splits the load.
+#
+# Single GPU (default):
+# CUDA_DEVICE=0
+#
+# Multi-GPU (recommended for 2+ GPUs):
+# GPU 0 (primary, e.g. RTX 5090): Queen model only — full VRAM for large context
+# GPU 1 (secondary, e.g. RTX 3080): Swarm workers + embedding — uses multi-arch image
 # CUDA_DEVICE=0
+# CUDA_DEVICE_SWARM=1
+# CUDA_DEVICE_EMBED=1
+# EMBED_IMAGE=ghcr.io/ggml-org/llama.cpp:server-cuda
+#
+# The swarm image defaults to the official multi-arch build (ghcr.io/ggml-org/llama.cpp:server-cuda)
+# which runs on any CUDA GPU. Override LLM_SWARM_IMAGE for a native build on specific hardware.
+
+# --- Local Swarm (parallel colony workers on a second llama.cpp instance) ---
+# Setup: bash scripts/setup-local-swarm.sh
+# Start: docker compose -f docker-compose.yml -f docker-compose.local-swarm.yml up -d
+#
+# Deep Queen (RECOMMENDED for multi-GPU):
+# Queen gets full 65K context on GPU 0, 4 parallel workers on GPU 1.
+# GPU 0 VRAM: ~23GB (35B weights + bf16 KV). GPU 1 VRAM: ~8.7GB (4B + embed).
+# LLM_SWARM_HOST=http://llm-swarm:8080
+# LLM_SLOTS=1
+# LLM_CONTEXT_SIZE=65536
+# LLM_SWARM_CTX=128000
+# LLM_SWARM_SLOTS=4
+# CODER_MODEL=llama-cpp-swarm/qwen3.5-4b-swarm
+# REVIEWER_MODEL=llama-cpp-swarm/qwen3.5-4b-swarm
+# ARCHIVIST_MODEL=llama-cpp-swarm/qwen3.5-4b-swarm
 
 # --- Sandbox execution ---
 # Set to false to disable Docker sandbox container spawning (code_execute tool).
-# Also remove the /var/run/docker.sock mount from docker-compose.yml if you
-# want to opt out of Docker daemon access entirely.
 # SANDBOX_ENABLED=true
 
 # --- Data directory ---
 # Default: ./data in development, /data in Docker.
 # IMPORTANT: Use named Docker volumes for SQLite persistence.
-# Never bind-mount the SQLite database on macOS/Windows Docker Desktop —
-# WAL mode requires POSIX shared-memory semantics that don't translate
-# through Docker Desktop's filesystem layer.
 # FORMICOS_DATA_DIR=./data
+
+# --- Benchmark directory (dev only) ---
+# Mount a benchmark exercises directory into the container.
+# BENCHMARK_DIR=/path/to/polyglot-benchmark
@@ -51,3 +51,18 @@ lint-imports-*
 
 # Lock file (committed but generated)
 # uv.lock
+
+# TLS certs (mkcert -- never commit private keys)
+certs/
+.certs/
+
+# Test mock artifacts
+MagicMock/
+
+# Temp directories
+.codex_tmp/
+.tmp_pytest/
+.tmp_runtime_tests/
+
+# Ruff cache
+.ruff_cache/
@@ -0,0 +1,15 @@
+import sqlite3, sys
+conn = sqlite3.connect(sys.argv[1])
+c = conn.cursor()
+c.execute("SELECT name FROM sqlite_master WHERE type='table'")
+tables = [t[0] for t in c.fetchall()]
+print("Tables:", tables)
+for t in tables:
+    c.execute("SELECT * FROM " + t + " LIMIT 5")
+    cols = [d[0] for d in c.description]
+    rows = c.fetchall()
+    if rows:
+        print("--- " + t + " (" + str(cols) + ") ---")
+        for r in rows:
+            print(r)
+conn.close()
@@ -2,13 +2,29 @@
 
 Open-source Python system: AI agents coordinate through shared environmental
 signals (pheromones), not direct messaging. Tree-structured data model.
-Event-sourced (69 events, closed union). Single operator. Local-first with
+Event-sourced (70 events, closed union). Single operator. Local-first with
 cloud model support. Bayesian knowledge metabolism with Thompson Sampling
 retrieval. Federated knowledge exchange via Computational CRDTs.
-Multi-colony orchestration via DelegationPlan DAG parallelism.
-MCP developer bridge (27 tools, 9 resources, 6 prompts) for Claude Code
-integration. Queen Command & Control surface with behavioral overrides,
-display board, tool tracking, and context budget visibility.
+Multi-colony orchestration via DelegationPlan DAG parallelism with deferred
+group dispatch. MCP developer bridge (29 tools, 12 resources, 8 prompts)
+for Claude Code integration. Queen Command & Control surface with behavioral
+overrides, display board, tool tracking, and context budget visibility.
+Planning workbench with structural analysis, replay-derived capability
+calibration, deterministic reviewed-plan validation and dispatch, saved plan
+patterns, and DAG editing. Live planning policy (`planning_policy.py`) as
+the Queen routing authority with fast_path enforcement at the execution
+layer. Project binding with real-repo codebase indexing (14K+ chunks).
+Production local profile: Qwen3.5-35B MoE (0.804 quality on fast_path
+tasks, 5/5 real-repo tasks completed, zero hangs).
+
+## Knowledge base
+
+The FormicOS knowledge system contains 100+ entries covering agent
+architecture state of the art (loop patterns, tool calling, context
+engineering, multi-agent coordination, production deployment, evaluation).
+Before making architectural choices, search the knowledge base via the
+`search_knowledge` MCP tool or the Queen's `memory_search` tool. Cite
+relevant entries by title when they influence design decisions.
 
 ## Architecture
 
@@ -275,6 +291,48 @@ not a DAG; they are Queen scaffolding. When a colony completes a step, the
 system prompts the Queen with the next pending step via the follow_up_colony
 summary.
 
+### Metering and billing (Wave 75)
+
+Token metering aggregates `TokensConsumed` events per billing period with
+chain-hash integrity. `formicos billing` CLI subgroup (status, estimate,
+attest, history, self-test). Attestations are deterministic and stored in
+`data_dir/attestations/`. The `metering.py` surface module computes fees
+from tiered token thresholds. `scripts/attribution.py` computes contributor
+revenue-share proportions from git history.
+
+### A2A economic contracts (Wave 75)
+
+Task receipts (`surface/task_receipts.py`) produce deterministic cost/quality
+summaries for completed A2A work. `get_task_receipt` MCP tool and
+`formicos://receipt/{task_id}` resource expose receipts to clients.
+`search_knowledge` MCP tool provides full-pipeline retrieval from external
+clients (semantic + Thompson + freshness + co-occurrence + graph proximity).
+
+### Structural integrity (Wave 76)
+
+16 correctness fixes across 3 teams (data truth, operational safety,
+context integrity). No new features -- fixes silent errors and race
+conditions that would surface under real multi-client load.
+
+Data truth: `BudgetSnapshot.total_tokens` includes reasoning tokens.
+Agent-to-colony reverse index (`_agent_colony_index`) in ProjectionStore
+for O(1) token attribution. Daily spend persistence to disk with reload
+on restart. Budget reconciliation (estimated vs actual colony cost)
+wired through `_post_colony_hooks`.
+
+Operational safety: Action queue compaction preserves `pending_review`
+items. State transition validation via `_VALID_TRANSITIONS` map (409 on
+invalid). Operational sweep reentrancy guard (`asyncio.Lock`).
+Kill/completion race guard at both colony completion paths. Journal
+entries for all approval/execution branches. Operator-idle detection
+includes Queen thread messages.
+
+Context integrity: Budget caps on memory retrieval, notes, and thread
+context injections. Workspace-scoped session and plan paths with
+migration fallback. Queen chat workspace propagation across all 4
+dispatch sites. Settings and queen-overview workspace resolution via
+`activeWorkspaceId` property.
+
 ## Tech stack
 
 Use Python 3.12+, uv, Pydantic v2 (sole serialization), asyncio, httpx,
@@ -483,15 +541,28 @@ IMPORTANT: These are non-negotiable. Violating any of these requires operator ap
 | `surface/self_maintenance.py` | MaintenanceDispatcher, autonomy policy, blast radius, autonomy scoring | Self-maintenance |
 | `surface/project_plan.py` | Project plan parser/helper, milestone tools, plan rendering | Project plan |
 | `surface/queen_budget.py` | 9-slot proportional Queen context budget (ADR-051) | Queen budget |
-| `surface/queen_tools.py` | Queen tool dispatch (42 tools), spawn_parallel, DelegationPlan | Queen tools |
+| `surface/queen_tools.py` | Queen tool dispatch (~45 tools, dynamic toolsets), spawn_parallel, DelegationPlan | Queen tools |
 | `surface/transcript_view.py` | Canonical colony transcript schema | A2A/MCP export |
 | `surface/proactive_intelligence.py` | 17 deterministic briefing rules (7 knowledge + 4 performance + evaporation + branching + earned autonomy + template health + outcome digest + popular unexamined) | Proactive intel |
 | `surface/routes/api.py` | REST endpoints: outcomes, create-demo, project-plan, autonomy-status, maintenance-policy, add-model | API surface |
 | `surface/workflow_learning.py` | Deterministic workflow pattern recognition + procedure suggestions (Wave 72) | Workflow learning |
 | `docs/AUTONOMOUS_OPERATIONS.md` | Autonomy operator runbook: action queue, levels, learning, controls | Reference |
 | `docs/DEVELOPER_BRIDGE.md` | Developer onboarding guide for Claude Code integration | Reference |
-| `surface/mcp_server.py` | MCP server (27 tools, 9 resources, 6 prompts) | MCP surface |
+| `surface/mcp_server.py` | MCP server (29 tools, 12 resources, 8 prompts) | MCP surface |
 | `config/templates/demo-workspace.yaml` | Demo workspace template with seeded entries | Demo path |
+| `surface/workspace_roots.py` | Project/library/runtime root resolution (Wave 81) | Project binding |
+| `surface/parallel_plans.py` | Deferred group dispatch, honest plan aggregation (Wave 81) | Parallel execution |
+| `surface/planning_signals.py` | Structured planning signal builder (Wave 82) | Planning |
+| `surface/structural_planner.py` | File matching, import coupling, grouping hints (Wave 82) | Planning |
+| `surface/capability_profiles.py` | Replay-derived capability calibration (Wave 82) | Planning |
+| `surface/reviewed_plan.py` | Reviewed-plan validation and normalization (Wave 83) | Planning workbench |
+| `surface/plan_patterns.py` | YAML-backed saved plan-pattern store (Wave 83) | Planning workbench |
+| `surface/planning_policy.py` | Consolidated routing: `decide_planning_route()` + `PlanningDecision` (Wave 85) | Queen routing |
+| `surface/commands.py` | WebSocket command handlers incl. `confirm_reviewed_plan` | WS surface |
+| `surface/metering.py` | Token metering, fee computation, attestation generation | Billing |
+| `surface/task_receipts.py` | Deterministic task receipts for A2A economic contracts | A2A economics |
+| `scripts/attribution.py` | Contributor revenue-share attribution from git history | Billing |
+| `docs/waves/wave_81/real_repo_task_pack.md` | Real-repo evaluation tasks (rtp-01 through rtp-05) | Benchmark |
 
 ## Common patterns
 
 
@@ -0,0 +1,17 @@
+:8443 {
+    tls /certs/localhost.pem /certs/localhost-key.pem
+
+    # CORS headers for Claude Desktop connector
+    header {
+        Access-Control-Allow-Origin *
+        Access-Control-Allow-Methods "GET, POST, OPTIONS"
+        Access-Control-Allow-Headers "Content-Type, Accept, Authorization, Mcp-Session-Id"
+        Access-Control-Expose-Headers "Mcp-Session-Id"
+    }
+
+    # Handle CORS preflight
+    @options method OPTIONS
+    respond @options 204
+
+    reverse_proxy formicos:8080
+}
@@ -9,6 +9,11 @@ RUN npm run build
 # Stage 2: Python runtime
 FROM python:3.12-slim AS runtime
 
+# System dependencies (git required for shadow checkpoints — Wave 78)
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends git \
+    && rm -rf /var/lib/apt/lists/*
+
 # Install uv for fast dependency resolution
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 
 
@@ -8,6 +8,30 @@ the canonical method for computing Total Tokens and producing Usage
 Attestations under Tier 2 and Tier 3 Commercial Licenses.
 
 
+## Implementation status (Wave 75)
+
+**Implemented:**
+- Event-store-backed token aggregation (`surface/metering.py`)
+- Fee computation (`compute_fee`) — single source of truth
+- Unsigned v1 attestation generation
+- CLI: `formicos billing status|estimate|attest|history|self-test`
+- REST: `GET /api/v1/billing/status`
+- MCP: `formicos://billing` resource, `economic-status` prompt
+
+**Deferred:**
+- Ed25519 key derivation and signing (attestations are `"unsigned"`)
+- Billing submission endpoint (`formicos billing submit`)
+- External billing service integration
+
+**Repo truth notes:**
+- `TokensConsumed` events carry `cost` (not `cost_usd` as shown in the
+  schema example below). The aggregate uses the actual event field name.
+- `TokensConsumed` events do not carry a `provider` field. The example
+  below shows `provider` for specification completeness. The implementation
+  derives provider best-effort from model name prefixes. `by_model` is
+  canonical; `by_provider` is not available in the current event schema.
+
+
 ## What is metered
 
 **Total Tokens** is the sum of all input tokens, output tokens, and